2025-04-18 21:34:35 +03:00
|
|
|
using X86Disassembler.X86;
|
2025-04-18 16:29:53 +03:00
|
|
|
|
|
|
|
namespace X86Disassembler.Analysers;
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Disassembles code into basic blocks by following control flow instructions.
|
|
|
|
/// A basic block is a sequence of instructions with a single entry point (the first instruction)
|
|
|
|
/// and a single exit point (the last instruction, typically a jump or return).
|
|
|
|
/// </summary>
|
|
|
|
public class BlockDisassembler
|
|
|
|
{
|
|
|
|
// The buffer containing the code to disassemble
|
|
|
|
private readonly byte[] _codeBuffer;
|
|
|
|
|
|
|
|
// The length of the buffer
|
|
|
|
private readonly int _length;
|
|
|
|
|
|
|
|
// The base address of the code
|
|
|
|
private readonly ulong _baseAddress;
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Initializes a new instance of the BlockDisassembler class
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="codeBuffer">The raw code bytes to be disassembled</param>
|
|
|
|
/// <param name="baseAddress">The base RVA (Relative Virtual Address) of the code section</param>
|
|
|
|
public BlockDisassembler(byte[] codeBuffer, ulong baseAddress)
|
|
|
|
{
|
|
|
|
_codeBuffer = codeBuffer;
|
|
|
|
_length = codeBuffer.Length;
|
|
|
|
|
|
|
|
_baseAddress = baseAddress;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Disassembles code starting from the specified RVA address by following control flow.
|
|
|
|
/// Creates blocks of instructions separated by jumps, branches, and returns.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="rvaAddress">The RVA (Relative Virtual Address) to start disassembly from</param>
|
|
|
|
/// <returns>A list of instruction blocks representing the control flow of the code</returns>
|
|
|
|
public AsmFunction DisassembleFromAddress(uint rvaAddress)
|
|
|
|
{
|
|
|
|
// Create instruction decoder for parsing the code buffer
|
|
|
|
InstructionDecoder decoder = new InstructionDecoder(_codeBuffer, _length);
|
|
|
|
|
|
|
|
// Track visited addresses to prevent infinite loops
|
|
|
|
HashSet<ulong> visitedAddresses = [];
|
|
|
|
|
|
|
|
// Queue of addresses to process (breadth-first approach)
|
|
|
|
Queue<ulong> addressQueue = [];
|
2025-04-18 21:34:35 +03:00
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
// Calculate the file offset from the RVA by subtracting the base address
|
2025-04-18 21:34:35 +03:00
|
|
|
// Store the file offset for processing, but we'll convert back to RVA when creating blocks
|
|
|
|
ulong fileOffset = rvaAddress - _baseAddress;
|
|
|
|
addressQueue.Enqueue(fileOffset);
|
|
|
|
|
|
|
|
// Keep track of the original entry point RVA for the function
|
|
|
|
ulong entryPointRVA = rvaAddress;
|
2025-04-18 16:29:53 +03:00
|
|
|
|
|
|
|
// List to store discovered basic blocks
|
|
|
|
List<InstructionBlock> blocks = [];
|
2025-04-18 21:34:35 +03:00
|
|
|
|
|
|
|
// Dictionary to track blocks by address for quick lookup
|
|
|
|
Dictionary<ulong, InstructionBlock> blocksByAddress = new Dictionary<ulong, InstructionBlock>();
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
while (addressQueue.Count > 0)
|
|
|
|
{
|
|
|
|
// Get the next address to process
|
|
|
|
var address = addressQueue.Dequeue();
|
|
|
|
|
|
|
|
// Skip if we've already visited this address
|
|
|
|
if (!visitedAddresses.Add(address))
|
|
|
|
{
|
|
|
|
Console.WriteLine($"Already visited address {address}");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Position the decoder at the current address
|
|
|
|
decoder.SetPosition((int) address);
|
|
|
|
|
|
|
|
// Collect instructions for this block
|
|
|
|
List<Instruction> instructions = [];
|
2025-04-18 21:34:35 +03:00
|
|
|
|
|
|
|
// Get the current block if it exists (for tracking predecessors)
|
|
|
|
InstructionBlock? currentBlock = null;
|
|
|
|
if (blocksByAddress.TryGetValue(address, out var existingBlock))
|
|
|
|
{
|
|
|
|
currentBlock = existingBlock;
|
|
|
|
}
|
2025-04-18 16:29:53 +03:00
|
|
|
|
|
|
|
// Process instructions until we hit a control flow change
|
|
|
|
while (true)
|
|
|
|
{
|
2025-04-18 21:34:35 +03:00
|
|
|
// Get the current position
|
|
|
|
ulong currentPosition = (ulong)decoder.GetPosition();
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
// If we've stepped onto an existing block, create a new block up to this point
|
|
|
|
// and stop processing this path (to avoid duplicating instructions)
|
2025-04-18 21:34:35 +03:00
|
|
|
if (blocksByAddress.TryGetValue(currentPosition, out var targetBlock) && currentPosition != address)
|
2025-04-18 16:29:53 +03:00
|
|
|
{
|
|
|
|
Console.WriteLine("Stepped on to existing block. Creating in the middle");
|
2025-04-18 21:34:35 +03:00
|
|
|
|
|
|
|
// Register this block and establish the relationship with the target block
|
|
|
|
var newBlock = RegisterBlock(blocks, address, instructions, null, false, false);
|
|
|
|
blocksByAddress[address] = newBlock;
|
|
|
|
|
|
|
|
// Add the target block as a successor to the new block
|
|
|
|
newBlock.Successors.Add(targetBlock);
|
|
|
|
|
|
|
|
// Add the new block as a predecessor to the target block
|
|
|
|
targetBlock.Predecessors.Add(newBlock);
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Decode the next instruction
|
|
|
|
var instruction = decoder.DecodeInstruction();
|
|
|
|
|
|
|
|
// Handle decoding failures
|
|
|
|
if (instruction is null)
|
|
|
|
{
|
|
|
|
throw new InvalidOperationException($"Unexpectedly failed to decode instruction at {address}");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add the instruction to the current block
|
|
|
|
instructions.Add(instruction);
|
|
|
|
|
|
|
|
// Check for conditional jump (e.g., JZ, JNZ, JLE)
|
|
|
|
// For conditional jumps, we need to follow both the jump target and the fall-through path
|
|
|
|
if (instruction.Type.IsConditionalJump())
|
|
|
|
{
|
2025-04-18 21:34:35 +03:00
|
|
|
// Get the jump target address
|
|
|
|
uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue();
|
|
|
|
|
|
|
|
// Get the fall-through address (next instruction after this jump)
|
|
|
|
uint fallThroughAddress = (uint)decoder.GetPosition();
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
// Register this block (it ends with a conditional jump)
|
2025-04-18 21:34:35 +03:00
|
|
|
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
|
|
|
|
blocksByAddress[address] = newBlock;
|
2025-04-18 16:29:53 +03:00
|
|
|
|
|
|
|
// Queue the jump target address for processing
|
2025-04-18 21:34:35 +03:00
|
|
|
addressQueue.Enqueue(jumpTargetAddress);
|
2025-04-18 16:29:53 +03:00
|
|
|
|
|
|
|
// Queue the fall-through address (next instruction after this jump)
|
2025-04-18 21:34:35 +03:00
|
|
|
addressQueue.Enqueue(fallThroughAddress);
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for unconditional jump (e.g., JMP)
|
|
|
|
// For unconditional jumps, we only follow the jump target
|
|
|
|
if (instruction.Type.IsRegularJump())
|
|
|
|
{
|
2025-04-18 21:34:35 +03:00
|
|
|
// Get the jump target address
|
|
|
|
uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue();
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
// Register this block (it ends with an unconditional jump)
|
2025-04-18 21:34:35 +03:00
|
|
|
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
|
|
|
|
blocksByAddress[address] = newBlock;
|
2025-04-18 16:29:53 +03:00
|
|
|
|
|
|
|
// Queue the jump target address for processing
|
2025-04-18 21:34:35 +03:00
|
|
|
addressQueue.Enqueue(jumpTargetAddress);
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for return instruction (e.g., RET, RETF)
|
|
|
|
// Returns end a block without any successors
|
|
|
|
if (instruction.Type.IsRet())
|
|
|
|
{
|
|
|
|
// Register this block (it ends with a return)
|
2025-04-18 21:34:35 +03:00
|
|
|
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
|
|
|
|
blocksByAddress[address] = newBlock;
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Since blocks aren't necessarily ordered (ASM can jump anywhere it likes)
|
|
|
|
// we need to sort the blocks ourselves
|
|
|
|
blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address));
|
|
|
|
|
2025-04-18 21:34:35 +03:00
|
|
|
// Convert all block addresses from file offsets to RVA
|
|
|
|
foreach (var block in blocks)
|
2025-04-18 16:29:53 +03:00
|
|
|
{
|
2025-04-18 21:34:35 +03:00
|
|
|
// Convert from file offset to RVA by adding the base address
|
|
|
|
ulong rvaBlockAddress = block.Address + _baseAddress;
|
|
|
|
Console.WriteLine($"Converting block address from file offset 0x{block.Address:X8} to RVA 0x{rvaBlockAddress:X8}");
|
|
|
|
block.Address = rvaBlockAddress;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a new AsmFunction with the RVA address
|
|
|
|
var asmFunction = new AsmFunction()
|
|
|
|
{
|
|
|
|
Address = entryPointRVA,
|
2025-04-18 16:29:53 +03:00
|
|
|
Blocks = blocks,
|
|
|
|
};
|
2025-04-18 21:34:35 +03:00
|
|
|
|
|
|
|
// Verify that the entry block exists
|
|
|
|
var entryBlock = asmFunction.EntryBlock;
|
|
|
|
if (entryBlock == null)
|
|
|
|
{
|
|
|
|
Console.WriteLine($"Warning: No entry block found at RVA 0x{entryPointRVA:X8}");
|
|
|
|
|
|
|
|
// Try to find a block at the file offset address (for backward compatibility)
|
|
|
|
var fallbackBlock = blocks.FirstOrDefault(b => b.Address == (fileOffset + _baseAddress));
|
|
|
|
if (fallbackBlock != null)
|
|
|
|
{
|
|
|
|
Console.WriteLine($"Found fallback entry block at RVA 0x{fallbackBlock.Address:X8}");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Console.WriteLine($"Found entry block at RVA 0x{entryBlock.Address:X8}");
|
|
|
|
}
|
|
|
|
|
|
|
|
return asmFunction;
|
2025-04-18 16:29:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Creates and registers a new instruction block in the blocks collection
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="blocks">The list of blocks to add to</param>
|
|
|
|
/// <param name="address">The starting address of the block</param>
|
|
|
|
/// <param name="instructions">The instructions contained in the block</param>
|
2025-04-18 21:34:35 +03:00
|
|
|
/// <param name="currentBlock">The current block being processed (null if this is the first block)</param>
|
|
|
|
/// <param name="isJumpTarget">Whether this block is a jump target</param>
|
|
|
|
/// <param name="isFallThrough">Whether this block is a fall-through from another block</param>
|
|
|
|
/// <returns>The newly created block</returns>
|
|
|
|
public InstructionBlock RegisterBlock(
|
|
|
|
List<InstructionBlock> blocks,
|
|
|
|
ulong address,
|
|
|
|
List<Instruction> instructions,
|
|
|
|
InstructionBlock? currentBlock = null,
|
|
|
|
bool isJumpTarget = false,
|
|
|
|
bool isFallThrough = false)
|
2025-04-18 16:29:53 +03:00
|
|
|
{
|
2025-04-18 21:34:35 +03:00
|
|
|
// Check if a block already exists at this address
|
|
|
|
var existingBlock = blocks.FirstOrDefault(b => b.Address == address);
|
|
|
|
|
|
|
|
if (existingBlock != null)
|
|
|
|
{
|
|
|
|
// If the current block is not null, update the relationships
|
|
|
|
if (currentBlock != null)
|
|
|
|
{
|
|
|
|
// Add the existing block as a successor to the current block if not already present
|
|
|
|
if (!currentBlock.Successors.Contains(existingBlock))
|
|
|
|
{
|
|
|
|
currentBlock.Successors.Add(existingBlock);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add the current block as a predecessor to the existing block if not already present
|
|
|
|
if (!existingBlock.Predecessors.Contains(currentBlock))
|
|
|
|
{
|
|
|
|
existingBlock.Predecessors.Add(currentBlock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return existingBlock;
|
|
|
|
}
|
|
|
|
|
2025-04-18 16:29:53 +03:00
|
|
|
// Create a new block with the provided address and instructions
|
|
|
|
var block = new InstructionBlock()
|
|
|
|
{
|
|
|
|
Address = address,
|
|
|
|
Instructions = instructions
|
|
|
|
};
|
|
|
|
|
|
|
|
// Add the block to the collection
|
|
|
|
blocks.Add(block);
|
2025-04-18 21:34:35 +03:00
|
|
|
|
|
|
|
// If the current block is not null, update the relationships
|
|
|
|
if (currentBlock != null)
|
|
|
|
{
|
|
|
|
// Add the new block as a successor to the current block
|
|
|
|
currentBlock.Successors.Add(block);
|
|
|
|
|
|
|
|
// Add the current block as a predecessor to the new block
|
|
|
|
block.Predecessors.Add(currentBlock);
|
|
|
|
}
|
2025-04-18 16:29:53 +03:00
|
|
|
|
|
|
|
// Log the created block for debugging
|
|
|
|
Console.WriteLine($"Created block:\n{block}");
|
2025-04-18 21:34:35 +03:00
|
|
|
|
|
|
|
return block;
|
2025-04-18 16:29:53 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Represents a basic block of instructions with a single entry and exit point
|
|
|
|
/// </summary>
|
|
|
|
public class InstructionBlock
|
|
|
|
{
|
|
|
|
/// <summary>
|
|
|
|
/// The starting address of the block
|
|
|
|
/// </summary>
|
|
|
|
public ulong Address { get; set; }
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The list of instructions contained in this block
|
|
|
|
/// </summary>
|
2025-04-18 21:34:35 +03:00
|
|
|
public List<Instruction> Instructions { get; set; } = [];
|
2025-04-18 16:29:53 +03:00
|
|
|
|
|
|
|
/// <summary>
|
2025-04-18 21:34:35 +03:00
|
|
|
/// The blocks that can transfer control to this block
|
|
|
|
/// </summary>
|
|
|
|
public List<InstructionBlock> Predecessors { get; set; } = [];
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The blocks that this block can transfer control to
|
|
|
|
/// </summary>
|
|
|
|
public List<InstructionBlock> Successors { get; set; } = [];
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Returns a string representation of the block, including its address, instructions, and control flow information
|
2025-04-18 16:29:53 +03:00
|
|
|
/// </summary>
|
|
|
|
public override string ToString()
|
|
|
|
{
|
2025-04-18 21:34:35 +03:00
|
|
|
// Create a string for predecessors
|
|
|
|
string predecessorsStr = Predecessors.Count > 0
|
|
|
|
? $"Predecessors: {string.Join(", ", Predecessors.Select(p => $"0x{p.Address:X8}"))}"
|
|
|
|
: "No predecessors";
|
|
|
|
|
|
|
|
// Create a string for successors
|
|
|
|
string successorsStr = Successors.Count > 0
|
|
|
|
? $"Successors: {string.Join(", ", Successors.Select(s => $"0x{s.Address:X8}"))}"
|
|
|
|
: "No successors";
|
|
|
|
|
|
|
|
// Return the complete string representation
|
|
|
|
return $"Address: 0x{Address:X8}\n{predecessorsStr}\n{successorsStr}\n{string.Join("\n", Instructions)}";
|
2025-04-18 16:29:53 +03:00
|
|
|
}
|
|
|
|
}
|