mirror of
https://github.com/sampletext32/ParkanPlayground.git
synced 2025-07-01 04:40:25 +03:00
Fix address conversion in BlockDisassembler to properly handle RVA addresses and ensure entry blocks are correctly identified
This commit is contained in:
@ -1,13 +1,41 @@
|
||||
namespace X86Disassembler.Analysers;
|
||||
namespace X86Disassembler.Analysers;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a disassembled function with its control flow graph
|
||||
/// </summary>
|
||||
public class AsmFunction
|
||||
{
|
||||
/// <summary>
|
||||
/// The starting address of the function
|
||||
/// </summary>
|
||||
public ulong Address { get; set; }
|
||||
|
||||
public List<InstructionBlock> Blocks { get; set; }
|
||||
/// <summary>
|
||||
/// The list of basic blocks that make up the function
|
||||
/// </summary>
|
||||
public List<InstructionBlock> Blocks { get; set; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// The entry block of the function
|
||||
/// </summary>
|
||||
public InstructionBlock? EntryBlock => Blocks.FirstOrDefault(b => b.Address == Address);
|
||||
|
||||
/// <summary>
|
||||
/// The exit blocks of the function (blocks that end with a return instruction)
|
||||
/// </summary>
|
||||
public List<InstructionBlock> ExitBlocks => Blocks.Where(b =>
|
||||
b.Instructions.Count > 0 &&
|
||||
b.Instructions[^1].Type.IsRet()).ToList();
|
||||
|
||||
/// <summary>
|
||||
/// Returns a string representation of the function, including its address and blocks
|
||||
/// </summary>
|
||||
public override string ToString()
|
||||
{
|
||||
return $"Function at {Address:X8}\n{string.Join("\n", Blocks.Select(x => $"\t{x}"))}";
|
||||
return $"Function at 0x{Address:X8}\n" +
|
||||
$"Entry Block: 0x{EntryBlock?.Address.ToString("X8") ?? "None"}\n" +
|
||||
$"Exit Blocks: {(ExitBlocks.Count > 0 ? string.Join(", ", ExitBlocks.Select(b => $"0x{b.Address:X8}")) : "None")}\n" +
|
||||
$"Total Blocks: {Blocks.Count}\n" +
|
||||
$"{string.Join("\n", Blocks.Select(x => $"\t{x}"))}";
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
using X86Disassembler.X86;
|
||||
using X86Disassembler.X86;
|
||||
|
||||
namespace X86Disassembler.Analysers;
|
||||
|
||||
@ -47,11 +47,21 @@ public class BlockDisassembler
|
||||
|
||||
// Queue of addresses to process (breadth-first approach)
|
||||
Queue<ulong> addressQueue = [];
|
||||
|
||||
// Calculate the file offset from the RVA by subtracting the base address
|
||||
addressQueue.Enqueue(rvaAddress - _baseAddress);
|
||||
// Store the file offset for processing, but we'll convert back to RVA when creating blocks
|
||||
ulong fileOffset = rvaAddress - _baseAddress;
|
||||
addressQueue.Enqueue(fileOffset);
|
||||
|
||||
// Keep track of the original entry point RVA for the function
|
||||
ulong entryPointRVA = rvaAddress;
|
||||
|
||||
// List to store discovered basic blocks
|
||||
List<InstructionBlock> blocks = [];
|
||||
|
||||
// Dictionary to track blocks by address for quick lookup
|
||||
Dictionary<ulong, InstructionBlock> blocksByAddress = new Dictionary<ulong, InstructionBlock>();
|
||||
|
||||
while (addressQueue.Count > 0)
|
||||
{
|
||||
// Get the next address to process
|
||||
@ -70,15 +80,35 @@ public class BlockDisassembler
|
||||
// Collect instructions for this block
|
||||
List<Instruction> instructions = [];
|
||||
|
||||
// Get the current block if it exists (for tracking predecessors)
|
||||
InstructionBlock? currentBlock = null;
|
||||
if (blocksByAddress.TryGetValue(address, out var existingBlock))
|
||||
{
|
||||
currentBlock = existingBlock;
|
||||
}
|
||||
|
||||
// Process instructions until we hit a control flow change
|
||||
while (true)
|
||||
{
|
||||
// Get the current position
|
||||
ulong currentPosition = (ulong)decoder.GetPosition();
|
||||
|
||||
// If we've stepped onto an existing block, create a new block up to this point
|
||||
// and stop processing this path (to avoid duplicating instructions)
|
||||
if (blocks.Any(x => x.Address == (ulong) decoder.GetPosition()))
|
||||
if (blocksByAddress.TryGetValue(currentPosition, out var targetBlock) && currentPosition != address)
|
||||
{
|
||||
Console.WriteLine("Stepped on to existing block. Creating in the middle");
|
||||
RegisterBlock(blocks, address, instructions);
|
||||
|
||||
// Register this block and establish the relationship with the target block
|
||||
var newBlock = RegisterBlock(blocks, address, instructions, null, false, false);
|
||||
blocksByAddress[address] = newBlock;
|
||||
|
||||
// Add the target block as a successor to the new block
|
||||
newBlock.Successors.Add(targetBlock);
|
||||
|
||||
// Add the new block as a predecessor to the target block
|
||||
targetBlock.Predecessors.Add(newBlock);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -98,17 +128,22 @@ public class BlockDisassembler
|
||||
// For conditional jumps, we need to follow both the jump target and the fall-through path
|
||||
if (instruction.Type.IsConditionalJump())
|
||||
{
|
||||
// Get the jump target address
|
||||
uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue();
|
||||
|
||||
// Get the fall-through address (next instruction after this jump)
|
||||
uint fallThroughAddress = (uint)decoder.GetPosition();
|
||||
|
||||
// Register this block (it ends with a conditional jump)
|
||||
RegisterBlock(blocks, address, instructions);
|
||||
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
|
||||
blocksByAddress[address] = newBlock;
|
||||
|
||||
// Queue the jump target address for processing
|
||||
addressQueue.Enqueue(
|
||||
instruction.StructuredOperands[0]
|
||||
.GetValue()
|
||||
);
|
||||
addressQueue.Enqueue(jumpTargetAddress);
|
||||
|
||||
// Queue the fall-through address (next instruction after this jump)
|
||||
addressQueue.Enqueue((uint) decoder.GetPosition());
|
||||
addressQueue.Enqueue(fallThroughAddress);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -116,14 +151,16 @@ public class BlockDisassembler
|
||||
// For unconditional jumps, we only follow the jump target
|
||||
if (instruction.Type.IsRegularJump())
|
||||
{
|
||||
// Get the jump target address
|
||||
uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue();
|
||||
|
||||
// Register this block (it ends with an unconditional jump)
|
||||
RegisterBlock(blocks, address, instructions);
|
||||
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
|
||||
blocksByAddress[address] = newBlock;
|
||||
|
||||
// Queue the jump target address for processing
|
||||
addressQueue.Enqueue(
|
||||
instruction.StructuredOperands[0]
|
||||
.GetValue()
|
||||
);
|
||||
addressQueue.Enqueue(jumpTargetAddress);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -132,7 +169,9 @@ public class BlockDisassembler
|
||||
if (instruction.Type.IsRet())
|
||||
{
|
||||
// Register this block (it ends with a return)
|
||||
RegisterBlock(blocks, address, instructions);
|
||||
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
|
||||
blocksByAddress[address] = newBlock;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -142,11 +181,41 @@ public class BlockDisassembler
|
||||
// we need to sort the blocks ourselves
|
||||
blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address));
|
||||
|
||||
return new AsmFunction()
|
||||
// Convert all block addresses from file offsets to RVA
|
||||
foreach (var block in blocks)
|
||||
{
|
||||
Address = rvaAddress,
|
||||
// Convert from file offset to RVA by adding the base address
|
||||
ulong rvaBlockAddress = block.Address + _baseAddress;
|
||||
Console.WriteLine($"Converting block address from file offset 0x{block.Address:X8} to RVA 0x{rvaBlockAddress:X8}");
|
||||
block.Address = rvaBlockAddress;
|
||||
}
|
||||
|
||||
// Create a new AsmFunction with the RVA address
|
||||
var asmFunction = new AsmFunction()
|
||||
{
|
||||
Address = entryPointRVA,
|
||||
Blocks = blocks,
|
||||
};
|
||||
|
||||
// Verify that the entry block exists
|
||||
var entryBlock = asmFunction.EntryBlock;
|
||||
if (entryBlock == null)
|
||||
{
|
||||
Console.WriteLine($"Warning: No entry block found at RVA 0x{entryPointRVA:X8}");
|
||||
|
||||
// Try to find a block at the file offset address (for backward compatibility)
|
||||
var fallbackBlock = blocks.FirstOrDefault(b => b.Address == (fileOffset + _baseAddress));
|
||||
if (fallbackBlock != null)
|
||||
{
|
||||
Console.WriteLine($"Found fallback entry block at RVA 0x{fallbackBlock.Address:X8}");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Found entry block at RVA 0x{entryBlock.Address:X8}");
|
||||
}
|
||||
|
||||
return asmFunction;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -155,8 +224,42 @@ public class BlockDisassembler
|
||||
/// <param name="blocks">The list of blocks to add to</param>
|
||||
/// <param name="address">The starting address of the block</param>
|
||||
/// <param name="instructions">The instructions contained in the block</param>
|
||||
public void RegisterBlock(List<InstructionBlock> blocks, ulong address, List<Instruction> instructions)
|
||||
/// <param name="currentBlock">The current block being processed (null if this is the first block)</param>
|
||||
/// <param name="isJumpTarget">Whether this block is a jump target</param>
|
||||
/// <param name="isFallThrough">Whether this block is a fall-through from another block</param>
|
||||
/// <returns>The newly created block</returns>
|
||||
public InstructionBlock RegisterBlock(
|
||||
List<InstructionBlock> blocks,
|
||||
ulong address,
|
||||
List<Instruction> instructions,
|
||||
InstructionBlock? currentBlock = null,
|
||||
bool isJumpTarget = false,
|
||||
bool isFallThrough = false)
|
||||
{
|
||||
// Check if a block already exists at this address
|
||||
var existingBlock = blocks.FirstOrDefault(b => b.Address == address);
|
||||
|
||||
if (existingBlock != null)
|
||||
{
|
||||
// If the current block is not null, update the relationships
|
||||
if (currentBlock != null)
|
||||
{
|
||||
// Add the existing block as a successor to the current block if not already present
|
||||
if (!currentBlock.Successors.Contains(existingBlock))
|
||||
{
|
||||
currentBlock.Successors.Add(existingBlock);
|
||||
}
|
||||
|
||||
// Add the current block as a predecessor to the existing block if not already present
|
||||
if (!existingBlock.Predecessors.Contains(currentBlock))
|
||||
{
|
||||
existingBlock.Predecessors.Add(currentBlock);
|
||||
}
|
||||
}
|
||||
|
||||
return existingBlock;
|
||||
}
|
||||
|
||||
// Create a new block with the provided address and instructions
|
||||
var block = new InstructionBlock()
|
||||
{
|
||||
@ -167,8 +270,20 @@ public class BlockDisassembler
|
||||
// Add the block to the collection
|
||||
blocks.Add(block);
|
||||
|
||||
// If the current block is not null, update the relationships
|
||||
if (currentBlock != null)
|
||||
{
|
||||
// Add the new block as a successor to the current block
|
||||
currentBlock.Successors.Add(block);
|
||||
|
||||
// Add the current block as a predecessor to the new block
|
||||
block.Predecessors.Add(currentBlock);
|
||||
}
|
||||
|
||||
// Log the created block for debugging
|
||||
Console.WriteLine($"Created block:\n{block}");
|
||||
|
||||
return block;
|
||||
}
|
||||
}
|
||||
|
||||
@ -185,13 +300,34 @@ public class InstructionBlock
|
||||
/// <summary>
|
||||
/// The list of instructions contained in this block
|
||||
/// </summary>
|
||||
public List<Instruction> Instructions { get; set; }
|
||||
public List<Instruction> Instructions { get; set; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Returns a string representation of the block, including its address and instructions
|
||||
/// The blocks that can transfer control to this block
|
||||
/// </summary>
|
||||
public List<InstructionBlock> Predecessors { get; set; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// The blocks that this block can transfer control to
|
||||
/// </summary>
|
||||
public List<InstructionBlock> Successors { get; set; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Returns a string representation of the block, including its address, instructions, and control flow information
|
||||
/// </summary>
|
||||
public override string ToString()
|
||||
{
|
||||
return $"Address: {Address:X8}\n{string.Join("\n", Instructions)}";
|
||||
// Create a string for predecessors
|
||||
string predecessorsStr = Predecessors.Count > 0
|
||||
? $"Predecessors: {string.Join(", ", Predecessors.Select(p => $"0x{p.Address:X8}"))}"
|
||||
: "No predecessors";
|
||||
|
||||
// Create a string for successors
|
||||
string successorsStr = Successors.Count > 0
|
||||
? $"Successors: {string.Join(", ", Successors.Select(s => $"0x{s.Address:X8}"))}"
|
||||
: "No successors";
|
||||
|
||||
// Return the complete string representation
|
||||
return $"Address: 0x{Address:X8}\n{predecessorsStr}\n{successorsStr}\n{string.Join("\n", Instructions)}";
|
||||
}
|
||||
}
|
277
X86Disassembler/Analysers/ControlFlowAnalyzer.cs
Normal file
277
X86Disassembler/Analysers/ControlFlowAnalyzer.cs
Normal file
@ -0,0 +1,277 @@
|
||||
using X86Disassembler.Analysers.DecompilerTypes;
|
||||
using X86Disassembler.X86;
|
||||
using X86Disassembler.X86.Operands;
|
||||
|
||||
namespace X86Disassembler.Analysers;
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes control flow structures in disassembled code
|
||||
/// </summary>
|
||||
public class ControlFlowAnalyzer
|
||||
{
|
||||
/// <summary>
|
||||
/// The analyzer context
|
||||
/// </summary>
|
||||
private readonly AnalyzerContext _context;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new control flow analyzer
|
||||
/// </summary>
|
||||
/// <param name="context">The analyzer context</param>
|
||||
public ControlFlowAnalyzer(AnalyzerContext context)
|
||||
{
|
||||
_context = context;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes the control flow of a function to identify high-level structures
|
||||
/// </summary>
|
||||
/// <param name="function">The function to analyze</param>
|
||||
public void AnalyzeControlFlow(Function function)
|
||||
{
|
||||
// First, identify if-else structures
|
||||
IdentifyIfElseStructures(function);
|
||||
|
||||
// Then, identify switch statements
|
||||
IdentifySwitchStatements(function);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Identifies if-else structures in the control flow graph
|
||||
/// </summary>
|
||||
/// <param name="function">The function to analyze</param>
|
||||
private void IdentifyIfElseStructures(Function function)
|
||||
{
|
||||
// For each block in the function
|
||||
foreach (var block in function.AsmFunction.Blocks)
|
||||
{
|
||||
// Skip blocks that don't end with a conditional jump
|
||||
if (block.Instructions.Count == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var lastInstruction = block.Instructions[^1];
|
||||
|
||||
// Look for conditional jumps (Jcc instructions)
|
||||
if (IsConditionalJump(lastInstruction.Type))
|
||||
{
|
||||
// This is a potential if-then-else structure
|
||||
// The true branch is the target of the jump
|
||||
// The false branch is the fallthrough block
|
||||
|
||||
// Get the jump target address
|
||||
ulong targetAddress = GetJumpTargetAddress(lastInstruction);
|
||||
|
||||
// Find the target block
|
||||
if (_context.BlocksByAddress.TryGetValue(targetAddress, out var targetBlock))
|
||||
{
|
||||
// Find the fallthrough block (the block that follows this one in memory)
|
||||
var fallthroughBlock = FindFallthroughBlock(block);
|
||||
|
||||
if (fallthroughBlock != null)
|
||||
{
|
||||
// Store the if-else structure in the context
|
||||
var ifElseStructure = new IfElseStructure
|
||||
{
|
||||
ConditionBlock = block,
|
||||
ThenBlock = targetBlock,
|
||||
ElseBlock = fallthroughBlock
|
||||
};
|
||||
|
||||
_context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Identifies switch statements in the control flow graph
|
||||
/// </summary>
|
||||
/// <param name="function">The function to analyze</param>
|
||||
private void IdentifySwitchStatements(Function function)
|
||||
{
|
||||
// For each block in the function
|
||||
foreach (var block in function.AsmFunction.Blocks)
|
||||
{
|
||||
// Look for patterns that indicate a switch statement
|
||||
// Common patterns include:
|
||||
// 1. A series of compare and jump instructions
|
||||
// 2. An indirect jump through a jump table
|
||||
|
||||
// For now, we'll focus on the first pattern (series of compares)
|
||||
if (IsPotentialSwitchHeader(block))
|
||||
{
|
||||
// This is a potential switch statement
|
||||
var switchStructure = new SwitchStructure
|
||||
{
|
||||
HeaderBlock = block,
|
||||
Cases = []
|
||||
};
|
||||
|
||||
// Find the cases by analyzing the successors
|
||||
foreach (var successor in block.Successors)
|
||||
{
|
||||
// Each successor is a potential case
|
||||
switchStructure.Cases.Add(new SwitchCase
|
||||
{
|
||||
CaseBlock = successor,
|
||||
Value = 0 // We'd need more analysis to determine the actual value
|
||||
});
|
||||
}
|
||||
|
||||
// Store the switch structure in the context
|
||||
_context.StoreAnalysisData(block.Address, "SwitchStructure", switchStructure);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if the given instruction type is a conditional jump
|
||||
/// </summary>
|
||||
/// <param name="type">The instruction type</param>
|
||||
/// <returns>True if the instruction is a conditional jump, false otherwise</returns>
|
||||
private bool IsConditionalJump(InstructionType type)
|
||||
{
|
||||
// Check for common conditional jumps
|
||||
return type == InstructionType.Jz ||
|
||||
type == InstructionType.Jnz ||
|
||||
type == InstructionType.Jg ||
|
||||
type == InstructionType.Jge ||
|
||||
type == InstructionType.Jl ||
|
||||
type == InstructionType.Jle ||
|
||||
type == InstructionType.Ja ||
|
||||
type == InstructionType.Jae ||
|
||||
type == InstructionType.Jb ||
|
||||
type == InstructionType.Jbe ||
|
||||
type == InstructionType.Jo ||
|
||||
type == InstructionType.Jno ||
|
||||
type == InstructionType.Js ||
|
||||
type == InstructionType.Jns ||
|
||||
type == InstructionType.Jp ||
|
||||
type == InstructionType.Jnp;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the target address of a jump instruction
|
||||
/// </summary>
|
||||
/// <param name="instruction">The jump instruction</param>
|
||||
/// <returns>The target address of the jump</returns>
|
||||
private ulong GetJumpTargetAddress(Instruction instruction)
|
||||
{
|
||||
// The target address is usually the first operand of the jump instruction
|
||||
if (instruction.StructuredOperands.Count > 0 &&
|
||||
instruction.StructuredOperands[0] is ImmediateOperand immOp)
|
||||
{
|
||||
return (ulong)immOp.Value;
|
||||
}
|
||||
|
||||
// If we can't determine the target, return 0
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Finds the fallthrough block for a given block
|
||||
/// </summary>
|
||||
/// <param name="block">The block to find the fallthrough for</param>
|
||||
/// <returns>The fallthrough block, or null if none found</returns>
|
||||
private InstructionBlock? FindFallthroughBlock(InstructionBlock block)
|
||||
{
|
||||
// The fallthrough block is the one that follows this one in memory
|
||||
// It should be a successor of this block
|
||||
foreach (var successor in block.Successors)
|
||||
{
|
||||
// Check if this successor is the fallthrough block
|
||||
// (its address should be immediately after this block)
|
||||
if (successor.Address > block.Address)
|
||||
{
|
||||
return successor;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if the given block is a potential switch statement header
|
||||
/// </summary>
|
||||
/// <param name="block">The block to check</param>
|
||||
/// <returns>True if the block is a potential switch header, false otherwise</returns>
|
||||
private bool IsPotentialSwitchHeader(InstructionBlock block)
|
||||
{
|
||||
// A switch header typically has multiple successors
|
||||
if (block.Successors.Count <= 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Look for patterns that indicate a switch statement
|
||||
// For now, we'll just check if the block ends with an indirect jump
|
||||
if (block.Instructions.Count > 0)
|
||||
{
|
||||
var lastInstruction = block.Instructions[^1];
|
||||
if (lastInstruction.Type == InstructionType.Jmp &&
|
||||
lastInstruction.StructuredOperands.Count > 0 &&
|
||||
!(lastInstruction.StructuredOperands[0] is ImmediateOperand))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an if-else structure in the control flow graph
|
||||
/// </summary>
|
||||
public class IfElseStructure
|
||||
{
|
||||
/// <summary>
|
||||
/// The block containing the condition
|
||||
/// </summary>
|
||||
public InstructionBlock ConditionBlock { get; set; } = null!;
|
||||
|
||||
/// <summary>
|
||||
/// The block containing the 'then' branch
|
||||
/// </summary>
|
||||
public InstructionBlock ThenBlock { get; set; } = null!;
|
||||
|
||||
/// <summary>
|
||||
/// The block containing the 'else' branch (may be null for if-then structures)
|
||||
/// </summary>
|
||||
public InstructionBlock ElseBlock { get; set; } = null!;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a switch statement in the control flow graph
|
||||
/// </summary>
|
||||
public class SwitchStructure
|
||||
{
|
||||
/// <summary>
|
||||
/// The block containing the switch header
|
||||
/// </summary>
|
||||
public InstructionBlock HeaderBlock { get; set; } = null!;
|
||||
|
||||
/// <summary>
|
||||
/// The cases of the switch statement
|
||||
/// </summary>
|
||||
public List<SwitchCase> Cases { get; set; } = [];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a case in a switch statement
|
||||
/// </summary>
|
||||
public class SwitchCase
|
||||
{
|
||||
/// <summary>
|
||||
/// The value of the case
|
||||
/// </summary>
|
||||
public int Value { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The block containing the case code
|
||||
/// </summary>
|
||||
public InstructionBlock CaseBlock { get; set; } = null!;
|
||||
}
|
||||
}
|
149
X86Disassembler/Analysers/DecompilerEngine.cs
Normal file
149
X86Disassembler/Analysers/DecompilerEngine.cs
Normal file
@ -0,0 +1,149 @@
|
||||
using X86Disassembler.Analysers.DecompilerTypes;
|
||||
using X86Disassembler.PE;
|
||||
using X86Disassembler.X86;
|
||||
|
||||
namespace X86Disassembler.Analysers;
|
||||
|
||||
/// <summary>
|
||||
/// Main engine for decompiling x86 code
|
||||
/// </summary>
|
||||
public class DecompilerEngine
|
||||
{
|
||||
/// <summary>
|
||||
/// The PE file being analyzed
|
||||
/// </summary>
|
||||
private readonly PeFile _peFile;
|
||||
|
||||
/// <summary>
|
||||
/// Dictionary of analyzed functions by address
|
||||
/// </summary>
|
||||
private readonly Dictionary<ulong, Function> _functions = [];
|
||||
|
||||
/// <summary>
|
||||
/// Dictionary of exported function names by address
|
||||
/// </summary>
|
||||
private readonly Dictionary<ulong, string> _exportedFunctions = [];
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new decompiler engine for the specified PE file
|
||||
/// </summary>
|
||||
/// <param name="peFile">The PE file to decompile</param>
|
||||
public DecompilerEngine(PeFile peFile)
|
||||
{
|
||||
_peFile = peFile;
|
||||
|
||||
// Initialize the exported functions dictionary
|
||||
foreach (var export in peFile.ExportedFunctions)
|
||||
{
|
||||
_exportedFunctions[export.AddressRva] = export.Name;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompiles a function at the specified address
|
||||
/// </summary>
|
||||
/// <param name="address">The address of the function to decompile</param>
|
||||
/// <returns>The decompiled function</returns>
|
||||
public Function DecompileFunction(ulong address)
|
||||
{
|
||||
// Check if we've already analyzed this function
|
||||
if (_functions.TryGetValue(address, out var existingFunction))
|
||||
{
|
||||
return existingFunction;
|
||||
}
|
||||
|
||||
// Find the code section containing this address
|
||||
var codeSection = _peFile.SectionHeaders.Find(s =>
|
||||
s.ContainsCode() &&
|
||||
address >= s.VirtualAddress &&
|
||||
address < s.VirtualAddress + s.VirtualSize);
|
||||
|
||||
if (codeSection == null)
|
||||
{
|
||||
throw new InvalidOperationException($"No code section found containing address 0x{address:X8}");
|
||||
}
|
||||
|
||||
// Get the section data
|
||||
int sectionIndex = _peFile.SectionHeaders.IndexOf(codeSection);
|
||||
byte[] codeBytes = _peFile.GetSectionData(sectionIndex);
|
||||
|
||||
// Create a disassembler for the code section
|
||||
var disassembler = new BlockDisassembler(codeBytes, codeSection.VirtualAddress);
|
||||
|
||||
// Disassemble the function
|
||||
var asmFunction = disassembler.DisassembleFromAddress((uint)address);
|
||||
|
||||
// Create an analyzer context
|
||||
var context = new AnalyzerContext(asmFunction);
|
||||
|
||||
// Run the analyzers
|
||||
var loopAnalyzer = new LoopAnalyzer();
|
||||
loopAnalyzer.AnalyzeLoops(context);
|
||||
|
||||
var dataFlowAnalyzer = new DataFlowAnalyzer();
|
||||
dataFlowAnalyzer.AnalyzeDataFlow(context);
|
||||
|
||||
// Get the function name from exports if available
|
||||
string functionName = _exportedFunctions.TryGetValue(address, out var name)
|
||||
? name
|
||||
: $"func_{address:X8}";
|
||||
|
||||
// Analyze the function
|
||||
var functionAnalyzer = new FunctionAnalyzer(context);
|
||||
var function = functionAnalyzer.AnalyzeFunction(address, functionName);
|
||||
|
||||
// Analyze control flow structures
|
||||
var controlFlowAnalyzer = new ControlFlowAnalyzer(context);
|
||||
controlFlowAnalyzer.AnalyzeControlFlow(function);
|
||||
|
||||
|
||||
|
||||
// Store the function in our cache
|
||||
_functions[address] = function;
|
||||
|
||||
return function;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates C-like pseudocode for a decompiled function
|
||||
/// </summary>
|
||||
/// <param name="function">The function to generate pseudocode for</param>
|
||||
/// <returns>The generated pseudocode</returns>
|
||||
public string GeneratePseudocode(Function function)
|
||||
{
|
||||
// Create a pseudocode generator
|
||||
var generator = new PseudocodeGenerator();
|
||||
|
||||
// Generate the pseudocode
|
||||
return generator.GeneratePseudocode(function);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompiles all exported functions in the PE file
|
||||
/// </summary>
|
||||
/// <returns>A dictionary of decompiled functions by address</returns>
|
||||
public Dictionary<ulong, Function> DecompileAllExportedFunctions()
|
||||
{
|
||||
foreach (var export in _peFile.ExportedFunctions)
|
||||
{
|
||||
// Skip forwarded exports
|
||||
if (export.IsForwarder)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
DecompileFunction(export.AddressRva);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"Error decompiling function {export.Name} at 0x{export.AddressRva:X8}: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
return _functions;
|
||||
}
|
||||
}
|
||||
|
||||
|
132
X86Disassembler/Analysers/FunctionAnalyzer.cs
Normal file
132
X86Disassembler/Analysers/FunctionAnalyzer.cs
Normal file
@ -0,0 +1,132 @@
|
||||
using X86Disassembler.Analysers.DecompilerTypes;
|
||||
using X86Disassembler.X86;
|
||||
using X86Disassembler.X86.Operands;
|
||||
|
||||
namespace X86Disassembler.Analysers;
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes disassembled functions to identify variables, parameters, and control flow structures
|
||||
/// </summary>
|
||||
public class FunctionAnalyzer
|
||||
{
|
||||
/// <summary>
|
||||
/// The analyzer context
|
||||
/// </summary>
|
||||
private readonly AnalyzerContext _context;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new function analyzer
|
||||
/// </summary>
|
||||
/// <param name="context">The analyzer context</param>
|
||||
public FunctionAnalyzer(AnalyzerContext context)
|
||||
{
|
||||
_context = context;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes a function at the specified address
|
||||
/// </summary>
|
||||
/// <param name="address">The address of the function</param>
|
||||
/// <param name="name">The name of the function (if known)</param>
|
||||
/// <returns>The analyzed function</returns>
|
||||
public Function AnalyzeFunction(ulong address, string name = "")
|
||||
{
|
||||
// If no name is provided, generate one based on the address
|
||||
if (string.IsNullOrEmpty(name))
|
||||
{
|
||||
name = $"func_{address:X8}";
|
||||
}
|
||||
|
||||
// Create a function object
|
||||
var function = new Function(name, address, _context.Function)
|
||||
{
|
||||
ReturnType = DataType.Unknown // Default to unknown return type
|
||||
};
|
||||
|
||||
// Create a variable analyzer and analyze variables
|
||||
var variableAnalyzer = new VariableAnalyzer(_context);
|
||||
variableAnalyzer.AnalyzeStackVariables(function);
|
||||
|
||||
// Determine the calling convention
|
||||
DetermineCallingConvention(function);
|
||||
|
||||
// Infer parameter and return types
|
||||
InferTypes(function);
|
||||
|
||||
return function;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines the calling convention of a function based on its behavior
|
||||
/// </summary>
|
||||
/// <param name="function">The function to analyze</param>
|
||||
private void DetermineCallingConvention(Function function)
|
||||
{
|
||||
// By default, we'll assume cdecl
|
||||
function.CallingConvention = CallingConvention.Cdecl;
|
||||
|
||||
// Get the exit blocks (blocks with ret instructions)
|
||||
var exitBlocks = function.AsmFunction.Blocks.Where(b =>
|
||||
b.Instructions.Count > 0 &&
|
||||
b.Instructions.Last().Type == InstructionType.Ret).ToList();
|
||||
|
||||
// Check if the function cleans up its own stack
|
||||
bool cleansOwnStack = false;
|
||||
|
||||
// Look for ret instructions with an immediate operand
|
||||
foreach (var block in function.AsmFunction.Blocks)
|
||||
{
|
||||
var lastInstruction = block.Instructions.LastOrDefault();
|
||||
if (lastInstruction != null && lastInstruction.Type == InstructionType.Ret)
|
||||
{
|
||||
// If the ret instruction has an immediate operand, it's cleaning its own stack
|
||||
if (lastInstruction.StructuredOperands.Count > 0 &&
|
||||
lastInstruction.StructuredOperands[0] is ImmediateOperand immOp &&
|
||||
immOp.Value > 0)
|
||||
{
|
||||
cleansOwnStack = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the function cleans its own stack, it's likely stdcall
|
||||
if (cleansOwnStack)
|
||||
{
|
||||
function.CallingConvention = CallingConvention.Stdcall;
|
||||
|
||||
// Check for thiscall (ECX used for this pointer)
|
||||
// This would require more sophisticated analysis of register usage
|
||||
}
|
||||
|
||||
// Check for fastcall (first two parameters in ECX and EDX)
|
||||
// This would require more sophisticated analysis of register usage
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Infers types for parameters and local variables based on their usage
|
||||
/// </summary>
|
||||
/// <param name="function">The function to analyze</param>
|
||||
private void InferTypes(Function function)
|
||||
{
|
||||
// This is a complex analysis that would require tracking how variables are used
|
||||
// For now, we'll just set default types
|
||||
|
||||
// Set return type based on register usage
|
||||
function.ReturnType = DataType.Int; // Default to int
|
||||
|
||||
// For each parameter, try to infer its type
|
||||
foreach (var param in function.Parameters)
|
||||
{
|
||||
// Default to int for now
|
||||
param.Type = DataType.Int;
|
||||
}
|
||||
|
||||
// For each local variable, try to infer its type
|
||||
foreach (var localVar in function.LocalVariables)
|
||||
{
|
||||
// Default to int for now
|
||||
localVar.Type = DataType.Int;
|
||||
}
|
||||
}
|
||||
}
|
385
X86Disassembler/Analysers/PseudocodeGenerator.cs
Normal file
385
X86Disassembler/Analysers/PseudocodeGenerator.cs
Normal file
@ -0,0 +1,385 @@
|
||||
using System.Text;
|
||||
using X86Disassembler.Analysers.DecompilerTypes;
|
||||
using X86Disassembler.X86;
|
||||
using X86Disassembler.X86.Operands;
|
||||
|
||||
namespace X86Disassembler.Analysers;
|
||||
|
||||
/// <summary>
|
||||
/// Generates C-like pseudocode from decompiled functions
|
||||
/// </summary>
|
||||
public class PseudocodeGenerator
|
||||
{
|
||||
/// <summary>
|
||||
/// Generates pseudocode for a decompiled function
|
||||
/// </summary>
|
||||
/// <param name="function">The function to generate pseudocode for</param>
|
||||
/// <returns>The generated pseudocode</returns>
|
||||
public string GeneratePseudocode(Function function)
|
||||
{
|
||||
var result = new StringBuilder();
|
||||
|
||||
// Add function signature
|
||||
result.AppendLine($"{function.ReturnType} {function.Name}({string.Join(", ", function.Parameters.Select(p => $"{p.Type} {p.Name}"))})")
|
||||
.AppendLine("{");
|
||||
|
||||
// Add local variable declarations
|
||||
foreach (var localVar in function.LocalVariables)
|
||||
{
|
||||
result.AppendLine($" {localVar.Type} {localVar.Name}; // Stack offset: {localVar.StackOffset}");
|
||||
}
|
||||
|
||||
// Add register variable declarations
|
||||
foreach (var regVar in function.RegisterVariables)
|
||||
{
|
||||
result.AppendLine($" {regVar.Type} {regVar.Name}; // Register: {RegisterMapper.GetRegisterName(regVar.Register!.Value, 32)}");
|
||||
}
|
||||
|
||||
if (function.LocalVariables.Count > 0 || function.RegisterVariables.Count > 0)
|
||||
{
|
||||
result.AppendLine();
|
||||
}
|
||||
|
||||
// Generate the function body using control flow analysis
|
||||
GenerateFunctionBody(function, result, 1);
|
||||
|
||||
// Add a return statement
|
||||
result.AppendLine()
|
||||
.AppendLine(" return 0; // Placeholder return value")
|
||||
.AppendLine("}");
|
||||
|
||||
return result.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates the body of the function using control flow analysis
|
||||
/// </summary>
|
||||
/// <param name="function">The function to generate code for</param>
|
||||
/// <param name="result">The string builder to append to</param>
|
||||
/// <param name="indentLevel">The current indentation level</param>
|
||||
private void GenerateFunctionBody(Function function, StringBuilder result, int indentLevel)
|
||||
{
|
||||
// Try to find the entry block
|
||||
var entryBlock = function.AsmFunction.EntryBlock;
|
||||
|
||||
// If the entry block is not found, try to find a block with an address that matches the function address minus the base address
|
||||
if (entryBlock == null && function.AsmFunction.Blocks.Count > 0)
|
||||
{
|
||||
// Get the first block as a fallback
|
||||
entryBlock = function.AsmFunction.Blocks[0];
|
||||
|
||||
// Log a warning but continue with the first block
|
||||
result.AppendLine($"{new string(' ', indentLevel * 4)}// Warning: Entry block not found at address 0x{function.Address:X8}, using first block at 0x{entryBlock.Address:X8}");
|
||||
}
|
||||
else if (entryBlock == null)
|
||||
{
|
||||
result.AppendLine($"{new string(' ', indentLevel * 4)}// Function body could not be decompiled - no blocks found");
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks in order, starting from the entry block
|
||||
var processedBlocks = new HashSet<ulong>();
|
||||
GenerateBlockCode(function, entryBlock, result, indentLevel, processedBlocks);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates code for a basic block and its successors
|
||||
/// </summary>
|
||||
/// <param name="function">The function containing the block</param>
|
||||
/// <param name="block">The block to generate code for</param>
|
||||
/// <param name="result">The string builder to append to</param>
|
||||
/// <param name="indentLevel">The current indentation level</param>
|
||||
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
|
||||
private void GenerateBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
|
||||
{
|
||||
// Check if we've already processed this block
|
||||
if (processedBlocks.Contains(block.Address))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Mark this block as processed
|
||||
processedBlocks.Add(block.Address);
|
||||
|
||||
// Check if this block is part of a control flow structure
|
||||
var context = function.AsmFunction.Context;
|
||||
|
||||
// Check for if-else structure
|
||||
var ifElseStructure = context.GetAnalysisData<ControlFlowAnalyzer.IfElseStructure>(block.Address, "IfElseStructure");
|
||||
if (ifElseStructure != null && ifElseStructure.ConditionBlock.Address == block.Address)
|
||||
{
|
||||
// This block is the condition of an if-else structure
|
||||
GenerateIfElseCode(function, ifElseStructure, result, indentLevel, processedBlocks);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for switch structure
|
||||
var switchStructure = context.GetAnalysisData<ControlFlowAnalyzer.SwitchStructure>(block.Address, "SwitchStructure");
|
||||
if (switchStructure != null && switchStructure.HeaderBlock.Address == block.Address)
|
||||
{
|
||||
// This block is the header of a switch structure
|
||||
GenerateSwitchCode(function, switchStructure, result, indentLevel, processedBlocks);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if this block is part of a loop
|
||||
var loops = context.LoopsByBlockAddress.TryGetValue(block.Address, out var blockLoops) ? blockLoops : null;
|
||||
if (loops != null && loops.Count > 0)
|
||||
{
|
||||
// Get the innermost loop
|
||||
var loop = loops[0];
|
||||
|
||||
// Check if this is the loop header
|
||||
if (loop.Header.Address == block.Address)
|
||||
{
|
||||
// This block is the header of a loop
|
||||
GenerateLoopCode(function, loop, result, indentLevel, processedBlocks);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If we get here, this is a regular block
|
||||
GenerateRegularBlockCode(function, block, result, indentLevel, processedBlocks);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates code for a regular basic block
|
||||
/// </summary>
|
||||
/// <param name="function">The function containing the block</param>
|
||||
/// <param name="block">The block to generate code for</param>
|
||||
/// <param name="result">The string builder to append to</param>
|
||||
/// <param name="indentLevel">The current indentation level</param>
|
||||
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
|
||||
private void GenerateRegularBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
|
||||
{
|
||||
// Add a comment with the block address
|
||||
string indent = new string(' ', indentLevel * 4);
|
||||
result.AppendLine($"{indent}// Block at 0x{block.Address:X8}");
|
||||
|
||||
// Generate pseudocode for the instructions in this block
|
||||
foreach (var instruction in block.Instructions)
|
||||
{
|
||||
// Skip function prologue/epilogue instructions
|
||||
if (IsPrologueOrEpilogueInstruction(instruction))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate pseudocode for this instruction
|
||||
string pseudocode = GenerateInstructionPseudocode(function, instruction);
|
||||
if (!string.IsNullOrEmpty(pseudocode))
|
||||
{
|
||||
result.AppendLine($"{indent}{pseudocode};");
|
||||
}
|
||||
}
|
||||
|
||||
// Process successors
|
||||
foreach (var successor in block.Successors)
|
||||
{
|
||||
if (!processedBlocks.Contains(successor.Address))
|
||||
{
|
||||
GenerateBlockCode(function, successor, result, indentLevel, processedBlocks);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates code for an if-else structure
|
||||
/// </summary>
|
||||
/// <param name="function">The function containing the structure</param>
|
||||
/// <param name="ifElseStructure">The if-else structure to generate code for</param>
|
||||
/// <param name="result">The string builder to append to</param>
|
||||
/// <param name="indentLevel">The current indentation level</param>
|
||||
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
|
||||
private void GenerateIfElseCode(Function function, ControlFlowAnalyzer.IfElseStructure ifElseStructure, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
|
||||
{
|
||||
// Mark the condition block as processed
|
||||
processedBlocks.Add(ifElseStructure.ConditionBlock.Address);
|
||||
|
||||
// Generate the condition expression
|
||||
string condition = GenerateConditionExpression(function, ifElseStructure.ConditionBlock);
|
||||
|
||||
// Add the if statement
|
||||
string indent = new string(' ', indentLevel * 4);
|
||||
result.AppendLine($"{indent}// If-else structure at 0x{ifElseStructure.ConditionBlock.Address:X8}")
|
||||
.AppendLine($"{indent}if ({condition})");
|
||||
|
||||
// Add the then branch
|
||||
result.AppendLine($"{indent}{{")
|
||||
.AppendLine($"{indent} // Then branch at 0x{ifElseStructure.ThenBlock.Address:X8}");
|
||||
|
||||
// Generate code for the then branch
|
||||
GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks);
|
||||
|
||||
// Close the then branch
|
||||
result.AppendLine($"{indent}}}");
|
||||
|
||||
// Add the else branch if it exists and is not already processed
|
||||
if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address))
|
||||
{
|
||||
result.AppendLine($"{indent}else")
|
||||
.AppendLine($"{indent}{{")
|
||||
.AppendLine($"{indent} // Else branch at 0x{ifElseStructure.ElseBlock.Address:X8}");
|
||||
|
||||
// Generate code for the else branch
|
||||
GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks);
|
||||
|
||||
// Close the else branch
|
||||
result.AppendLine($"{indent}}}");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates code for a switch structure
|
||||
/// </summary>
|
||||
/// <param name="function">The function containing the structure</param>
|
||||
/// <param name="switchStructure">The switch structure to generate code for</param>
|
||||
/// <param name="result">The string builder to append to</param>
|
||||
/// <param name="indentLevel">The current indentation level</param>
|
||||
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
|
||||
private void GenerateSwitchCode(Function function, ControlFlowAnalyzer.SwitchStructure switchStructure, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
|
||||
{
|
||||
// Mark the header block as processed
|
||||
processedBlocks.Add(switchStructure.HeaderBlock.Address);
|
||||
|
||||
// Generate the switch expression
|
||||
string switchExpr = "/* switch expression */";
|
||||
|
||||
// Add the switch statement
|
||||
string indent = new string(' ', indentLevel * 4);
|
||||
result.AppendLine($"{indent}// Switch structure at 0x{switchStructure.HeaderBlock.Address:X8}")
|
||||
.AppendLine($"{indent}switch ({switchExpr})");
|
||||
|
||||
// Add the switch body
|
||||
result.AppendLine($"{indent}{{")
|
||||
.AppendLine();
|
||||
|
||||
// Generate code for each case
|
||||
foreach (var switchCase in switchStructure.Cases)
|
||||
{
|
||||
// Add the case label
|
||||
result.AppendLine($"{indent} case {switchCase.Value}:")
|
||||
.AppendLine($"{indent} // Case block at 0x{switchCase.CaseBlock.Address:X8}");
|
||||
|
||||
// Generate code for the case block
|
||||
GenerateBlockCode(function, switchCase.CaseBlock, result, indentLevel + 2, processedBlocks);
|
||||
|
||||
// Add a break statement
|
||||
result.AppendLine($"{indent} break;")
|
||||
.AppendLine();
|
||||
}
|
||||
|
||||
// Add a default case
|
||||
result.AppendLine($"{indent} default:")
|
||||
.AppendLine($"{indent} // Default case")
|
||||
.AppendLine($"{indent} break;");
|
||||
|
||||
// Close the switch body
|
||||
result.AppendLine($"{indent}}}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates code for a loop structure
|
||||
/// </summary>
|
||||
/// <param name="function">The function containing the structure</param>
|
||||
/// <param name="loop">The loop to generate code for</param>
|
||||
/// <param name="result">The string builder to append to</param>
|
||||
/// <param name="indentLevel">The current indentation level</param>
|
||||
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
|
||||
private void GenerateLoopCode(Function function, AnalyzerContext.Loop loop, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
|
||||
{
|
||||
// Mark the header block as processed
|
||||
processedBlocks.Add(loop.Header.Address);
|
||||
|
||||
// Add the loop header
|
||||
string indent = new string(' ', indentLevel * 4);
|
||||
result.AppendLine($"{indent}// Loop at 0x{loop.Header.Address:X8}")
|
||||
.AppendLine($"{indent}while (true) // Simplified loop condition");
|
||||
|
||||
// Add the loop body
|
||||
result.AppendLine($"{indent}{{")
|
||||
.AppendLine($"{indent} // Loop body");
|
||||
|
||||
// Generate code for the loop body (starting with the header)
|
||||
GenerateBlockCode(function, loop.Header, result, indentLevel + 1, processedBlocks);
|
||||
|
||||
// Close the loop body
|
||||
result.AppendLine($"{indent}}}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a condition expression for an if statement
|
||||
/// </summary>
|
||||
/// <param name="function">The function containing the block</param>
|
||||
/// <param name="conditionBlock">The block containing the condition</param>
|
||||
/// <returns>A string representing the condition expression</returns>
|
||||
private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock)
|
||||
{
|
||||
// For now, we'll just return a placeholder
|
||||
// In a real implementation, we would analyze the instructions to determine the condition
|
||||
return "/* condition */";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates pseudocode for a single instruction
|
||||
/// </summary>
|
||||
/// <param name="function">The function containing the instruction</param>
|
||||
/// <param name="instruction">The instruction to generate pseudocode for</param>
|
||||
/// <returns>The generated pseudocode</returns>
|
||||
private string GenerateInstructionPseudocode(Function function, Instruction instruction)
|
||||
{
|
||||
// For now, we'll just return a comment with the instruction
|
||||
return $"/* {instruction} */";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if an instruction is part of the function prologue or epilogue
|
||||
/// </summary>
|
||||
/// <param name="instruction">The instruction to check</param>
|
||||
/// <returns>True if the instruction is part of the prologue or epilogue, false otherwise</returns>
|
||||
private bool IsPrologueOrEpilogueInstruction(Instruction instruction)
|
||||
{
|
||||
// Check for common prologue instructions
|
||||
if (instruction.Type == InstructionType.Push &&
|
||||
instruction.StructuredOperands.Count > 0 &&
|
||||
instruction.StructuredOperands[0] is RegisterOperand regOp &&
|
||||
regOp.Register == RegisterIndex.Bp)
|
||||
{
|
||||
return true; // push ebp
|
||||
}
|
||||
|
||||
if (instruction.Type == InstructionType.Mov &&
|
||||
instruction.StructuredOperands.Count > 1 &&
|
||||
instruction.StructuredOperands[0] is RegisterOperand destReg &&
|
||||
instruction.StructuredOperands[1] is RegisterOperand srcReg &&
|
||||
destReg.Register == RegisterIndex.Bp &&
|
||||
srcReg.Register == RegisterIndex.Sp)
|
||||
{
|
||||
return true; // mov ebp, esp
|
||||
}
|
||||
|
||||
if (instruction.Type == InstructionType.Sub &&
|
||||
instruction.StructuredOperands.Count > 1 &&
|
||||
instruction.StructuredOperands[0] is RegisterOperand subReg &&
|
||||
subReg.Register == RegisterIndex.Sp)
|
||||
{
|
||||
return true; // sub esp, X
|
||||
}
|
||||
|
||||
// Check for common epilogue instructions
|
||||
if (instruction.Type == InstructionType.Pop &&
|
||||
instruction.StructuredOperands.Count > 0 &&
|
||||
instruction.StructuredOperands[0] is RegisterOperand popReg &&
|
||||
popReg.Register == RegisterIndex.Bp)
|
||||
{
|
||||
return true; // pop ebp
|
||||
}
|
||||
|
||||
if (instruction.Type == InstructionType.Ret)
|
||||
{
|
||||
return true; // ret
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
252
X86Disassembler/Analysers/VariableAnalyzer.cs
Normal file
252
X86Disassembler/Analysers/VariableAnalyzer.cs
Normal file
@ -0,0 +1,252 @@
|
||||
using X86Disassembler.Analysers.DecompilerTypes;
|
||||
using X86Disassembler.X86;
|
||||
using X86Disassembler.X86.Operands;
|
||||
|
||||
namespace X86Disassembler.Analysers;
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes disassembled code to identify and track variables
|
||||
/// </summary>
|
||||
public class VariableAnalyzer
|
||||
{
|
||||
/// <summary>
|
||||
/// The analyzer context
|
||||
/// </summary>
|
||||
private readonly AnalyzerContext _context;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new variable analyzer
|
||||
/// </summary>
|
||||
/// <param name="context">The analyzer context</param>
|
||||
public VariableAnalyzer(AnalyzerContext context)
|
||||
{
|
||||
_context = context;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes the function to identify stack variables
|
||||
/// </summary>
|
||||
/// <param name="function">The function to analyze</param>
|
||||
public void AnalyzeStackVariables(Function function)
|
||||
{
|
||||
// Dictionary to track stack offsets and their corresponding variables
|
||||
var stackOffsets = new Dictionary<int, Variable>();
|
||||
|
||||
// First, identify the function prologue to determine stack frame setup
|
||||
bool hasPushEbp = false;
|
||||
bool hasMovEbpEsp = false;
|
||||
int localSize = 0;
|
||||
|
||||
// Look for the function prologue pattern: push ebp; mov ebp, esp; sub esp, X
|
||||
foreach (var block in function.AsmFunction.Blocks)
|
||||
{
|
||||
foreach (var instruction in block.Instructions)
|
||||
{
|
||||
// Look for push ebp
|
||||
if (instruction.Type == InstructionType.Push &&
|
||||
instruction.StructuredOperands.Count > 0 &&
|
||||
instruction.StructuredOperands[0] is RegisterOperand regOp &&
|
||||
regOp.Register == RegisterIndex.Bp)
|
||||
{
|
||||
hasPushEbp = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look for mov ebp, esp
|
||||
if (instruction.Type == InstructionType.Mov &&
|
||||
instruction.StructuredOperands.Count > 1 &&
|
||||
instruction.StructuredOperands[0] is RegisterOperand destReg &&
|
||||
instruction.StructuredOperands[1] is RegisterOperand srcReg &&
|
||||
destReg.Register == RegisterIndex.Bp &&
|
||||
srcReg.Register == RegisterIndex.Sp)
|
||||
{
|
||||
hasMovEbpEsp = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look for sub esp, X to determine local variable space
|
||||
if (instruction.Type == InstructionType.Sub &&
|
||||
instruction.StructuredOperands.Count > 1 &&
|
||||
instruction.StructuredOperands[0] is RegisterOperand subReg &&
|
||||
instruction.StructuredOperands[1] is ImmediateOperand immOp &&
|
||||
subReg.Register == RegisterIndex.Sp)
|
||||
{
|
||||
localSize = (int)immOp.Value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we found the complete prologue, no need to check more blocks
|
||||
if (hasPushEbp && hasMovEbpEsp && localSize > 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't find a standard prologue, we can't reliably analyze stack variables
|
||||
if (!hasPushEbp || !hasMovEbpEsp)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Now scan for memory accesses relative to EBP
|
||||
foreach (var block in function.AsmFunction.Blocks)
|
||||
{
|
||||
foreach (var instruction in block.Instructions)
|
||||
{
|
||||
// Look for memory operands that reference [ebp+X] or [ebp-X]
|
||||
foreach (var operand in instruction.StructuredOperands)
|
||||
{
|
||||
if (operand is DisplacementMemoryOperand memOp &&
|
||||
memOp.BaseRegister == RegisterIndex.Bp)
|
||||
{
|
||||
// This is accessing memory relative to EBP
|
||||
int offset = (int)memOp.Displacement;
|
||||
|
||||
// Determine if this is a parameter or local variable
|
||||
if (offset > 0 && offset < 1000) // Positive offset = parameter (with reasonable limit)
|
||||
{
|
||||
// Parameters start at [ebp+8] (return address at [ebp+4], saved ebp at [ebp+0])
|
||||
int paramIndex = (offset - 8) / 4; // Assuming 4-byte parameters
|
||||
|
||||
// Make sure we have enough parameters in the function
|
||||
while (function.Parameters.Count <= paramIndex)
|
||||
{
|
||||
var param = new Variable($"param_{function.Parameters.Count + 1}", DataType.Unknown)
|
||||
{
|
||||
Storage = Variable.StorageType.Parameter,
|
||||
StackOffset = 8 + (function.Parameters.Count * 4),
|
||||
IsParameter = true,
|
||||
ParameterIndex = function.Parameters.Count,
|
||||
Size = 4 // Assume 4 bytes (32-bit)
|
||||
};
|
||||
function.Parameters.Add(param);
|
||||
}
|
||||
}
|
||||
else if (offset < 0 && offset > -1000) // Negative offset = local variable (with reasonable limit)
|
||||
{
|
||||
// Check if we've already seen this offset
|
||||
if (!stackOffsets.TryGetValue(offset, out var variable))
|
||||
{
|
||||
// Create a new local variable
|
||||
variable = new Variable($"local_{Math.Abs(offset)}", DataType.Unknown)
|
||||
{
|
||||
Storage = Variable.StorageType.Stack,
|
||||
StackOffset = offset,
|
||||
Size = 4 // Assume 4 bytes (32-bit)
|
||||
};
|
||||
|
||||
// Add to our tracking dictionaries
|
||||
stackOffsets[offset] = variable;
|
||||
function.LocalVariables.Add(variable);
|
||||
}
|
||||
|
||||
// Track the usage of this variable
|
||||
TrackVariableUsage(variable, instruction);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Analyze register-based variables
|
||||
AnalyzeRegisterVariables(function);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes register usage to identify variables stored in registers
|
||||
/// </summary>
|
||||
/// <param name="function">The function to analyze</param>
|
||||
private void AnalyzeRegisterVariables(Function function)
|
||||
{
|
||||
// This is a more complex analysis that would track register values across blocks
|
||||
// For now, we'll focus on identifying registers that hold consistent values
|
||||
|
||||
// Dictionary to track register variables
|
||||
var registerVariables = new Dictionary<RegisterIndex, Variable>();
|
||||
|
||||
// For each block, analyze register usage
|
||||
foreach (var block in function.AsmFunction.Blocks)
|
||||
{
|
||||
// Check if we have register values for this block from data flow analysis
|
||||
var registerValuesKey = "RegisterValues";
|
||||
if (_context.GetAnalysisData<Dictionary<RegisterIndex, DataFlowAnalyzer.ValueInfo>>(block.Address, registerValuesKey) is Dictionary<RegisterIndex, DataFlowAnalyzer.ValueInfo> registerValues)
|
||||
{
|
||||
foreach (var kvp in registerValues)
|
||||
{
|
||||
var register = kvp.Key;
|
||||
var valueInfo = kvp.Value;
|
||||
|
||||
// Skip special registers like ESP and EBP
|
||||
if (register == RegisterIndex.Sp || register == RegisterIndex.Bp)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the register holds a constant value, it might be a variable
|
||||
if (valueInfo.Type == DataFlowAnalyzer.ValueInfo.ValueType.Constant)
|
||||
{
|
||||
// Check if we already have a variable for this register
|
||||
if (!registerVariables.TryGetValue(register, out var variable))
|
||||
{
|
||||
// Create a new register variable
|
||||
variable = new Variable($"reg_{RegisterMapper.GetRegisterName(register, 32)}", DataType.Unknown)
|
||||
{
|
||||
Storage = Variable.StorageType.Register,
|
||||
Register = register,
|
||||
Size = 4 // Assume 4 bytes (32-bit)
|
||||
};
|
||||
|
||||
// Add to our tracking dictionary
|
||||
registerVariables[register] = variable;
|
||||
function.RegisterVariables.Add(variable);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tracks how a variable is used in an instruction
|
||||
/// </summary>
|
||||
/// <param name="variable">The variable to track</param>
|
||||
/// <param name="instruction">The instruction using the variable</param>
|
||||
private void TrackVariableUsage(Variable variable, Instruction instruction)
|
||||
{
|
||||
// For now, we'll just try to infer the variable type based on its usage
|
||||
|
||||
// If the variable is used in a comparison with 0, it might be a boolean
|
||||
if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
|
||||
{
|
||||
if (instruction.StructuredOperands.Count > 1 &&
|
||||
instruction.StructuredOperands[1] is ImmediateOperand immOp &&
|
||||
immOp.Value == 0)
|
||||
{
|
||||
// This might be a boolean check
|
||||
if (variable.Type == DataType.Unknown)
|
||||
{
|
||||
// Set to int for now as we don't have a bool type
|
||||
variable.Type = DataType.Int;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the variable is used with string instructions, it might be a string
|
||||
// Check for string operations - we don't have specific string instruction types yet
|
||||
// Skip string detection for now as we don't have the specific instruction types
|
||||
// We'll detect strings through other means later
|
||||
|
||||
// If the variable is used with floating-point instructions, it might be a float
|
||||
// Check for floating-point operations
|
||||
if (instruction.Type == InstructionType.Fld ||
|
||||
instruction.Type == InstructionType.Fst ||
|
||||
instruction.Type == InstructionType.Fstp)
|
||||
{
|
||||
if (variable.Type == DataType.Unknown)
|
||||
{
|
||||
variable.Type = DataType.Float;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user