0
mirror of https://github.com/sampletext32/ParkanPlayground.git synced 2025-07-01 20:40:27 +03:00
Files
ParkanPlayground/X86Disassembler/Analysers/BlockDisassembler.cs

333 lines
14 KiB
C#
Raw Normal View History

using X86Disassembler.X86;
2025-04-18 16:29:53 +03:00
namespace X86Disassembler.Analysers;
/// <summary>
/// Disassembles code into basic blocks by following control flow instructions.
/// A basic block is a sequence of instructions with a single entry point (the first instruction)
/// and a single exit point (the last instruction, typically a jump or return).
/// </summary>
public class BlockDisassembler
{
// The buffer containing the code to disassemble
private readonly byte[] _codeBuffer;
// The length of the buffer
private readonly int _length;
// The base address of the code
private readonly ulong _baseAddress;
/// <summary>
/// Initializes a new instance of the BlockDisassembler class
/// </summary>
/// <param name="codeBuffer">The raw code bytes to be disassembled</param>
/// <param name="baseAddress">The base RVA (Relative Virtual Address) of the code section</param>
public BlockDisassembler(byte[] codeBuffer, ulong baseAddress)
{
_codeBuffer = codeBuffer;
_length = codeBuffer.Length;
_baseAddress = baseAddress;
}
/// <summary>
/// Disassembles code starting from the specified RVA address by following control flow.
/// Creates blocks of instructions separated by jumps, branches, and returns.
/// </summary>
/// <param name="rvaAddress">The RVA (Relative Virtual Address) to start disassembly from</param>
/// <returns>A list of instruction blocks representing the control flow of the code</returns>
public AsmFunction DisassembleFromAddress(uint rvaAddress)
{
// Create instruction decoder for parsing the code buffer
InstructionDecoder decoder = new InstructionDecoder(_codeBuffer, _length);
// Track visited addresses to prevent infinite loops
HashSet<ulong> visitedAddresses = [];
// Queue of addresses to process (breadth-first approach)
Queue<ulong> addressQueue = [];
2025-04-18 16:29:53 +03:00
// Calculate the file offset from the RVA by subtracting the base address
// Store the file offset for processing, but we'll convert back to RVA when creating blocks
ulong fileOffset = rvaAddress - _baseAddress;
addressQueue.Enqueue(fileOffset);
// Keep track of the original entry point RVA for the function
ulong entryPointRVA = rvaAddress;
2025-04-18 16:29:53 +03:00
// List to store discovered basic blocks
List<InstructionBlock> blocks = [];
// Dictionary to track blocks by address for quick lookup
Dictionary<ulong, InstructionBlock> blocksByAddress = new Dictionary<ulong, InstructionBlock>();
2025-04-18 16:29:53 +03:00
while (addressQueue.Count > 0)
{
// Get the next address to process
var address = addressQueue.Dequeue();
// Skip if we've already visited this address
if (!visitedAddresses.Add(address))
{
Console.WriteLine($"Already visited address {address}");
continue;
}
// Position the decoder at the current address
decoder.SetPosition((int) address);
// Collect instructions for this block
List<Instruction> instructions = [];
// Get the current block if it exists (for tracking predecessors)
InstructionBlock? currentBlock = null;
if (blocksByAddress.TryGetValue(address, out var existingBlock))
{
currentBlock = existingBlock;
}
2025-04-18 16:29:53 +03:00
// Process instructions until we hit a control flow change
while (true)
{
// Get the current position
ulong currentPosition = (ulong)decoder.GetPosition();
2025-04-18 16:29:53 +03:00
// If we've stepped onto an existing block, create a new block up to this point
// and stop processing this path (to avoid duplicating instructions)
if (blocksByAddress.TryGetValue(currentPosition, out var targetBlock) && currentPosition != address)
2025-04-18 16:29:53 +03:00
{
Console.WriteLine("Stepped on to existing block. Creating in the middle");
// Register this block and establish the relationship with the target block
var newBlock = RegisterBlock(blocks, address, instructions, null, false, false);
blocksByAddress[address] = newBlock;
// Add the target block as a successor to the new block
newBlock.Successors.Add(targetBlock);
// Add the new block as a predecessor to the target block
targetBlock.Predecessors.Add(newBlock);
2025-04-18 16:29:53 +03:00
break;
}
// Decode the next instruction
var instruction = decoder.DecodeInstruction();
// Handle decoding failures
if (instruction is null)
{
throw new InvalidOperationException($"Unexpectedly failed to decode instruction at {address}");
}
// Add the instruction to the current block
instructions.Add(instruction);
// Check for conditional jump (e.g., JZ, JNZ, JLE)
// For conditional jumps, we need to follow both the jump target and the fall-through path
if (instruction.Type.IsConditionalJump())
{
// Get the jump target address
uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue();
// Get the fall-through address (next instruction after this jump)
uint fallThroughAddress = (uint)decoder.GetPosition();
2025-04-18 16:29:53 +03:00
// Register this block (it ends with a conditional jump)
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
2025-04-18 16:29:53 +03:00
// Queue the jump target address for processing
addressQueue.Enqueue(jumpTargetAddress);
2025-04-18 16:29:53 +03:00
// Queue the fall-through address (next instruction after this jump)
addressQueue.Enqueue(fallThroughAddress);
2025-04-18 16:29:53 +03:00
break;
}
// Check for unconditional jump (e.g., JMP)
// For unconditional jumps, we only follow the jump target
if (instruction.Type.IsRegularJump())
{
// Get the jump target address
uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue();
2025-04-18 16:29:53 +03:00
// Register this block (it ends with an unconditional jump)
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
2025-04-18 16:29:53 +03:00
// Queue the jump target address for processing
addressQueue.Enqueue(jumpTargetAddress);
2025-04-18 16:29:53 +03:00
break;
}
// Check for return instruction (e.g., RET, RETF)
// Returns end a block without any successors
if (instruction.Type.IsRet())
{
// Register this block (it ends with a return)
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
2025-04-18 16:29:53 +03:00
break;
}
}
}
// Since blocks aren't necessarily ordered (ASM can jump anywhere it likes)
// we need to sort the blocks ourselves
blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address));
// Convert all block addresses from file offsets to RVA
foreach (var block in blocks)
2025-04-18 16:29:53 +03:00
{
// Convert from file offset to RVA by adding the base address
ulong rvaBlockAddress = block.Address + _baseAddress;
Console.WriteLine($"Converting block address from file offset 0x{block.Address:X8} to RVA 0x{rvaBlockAddress:X8}");
block.Address = rvaBlockAddress;
}
// Create a new AsmFunction with the RVA address
var asmFunction = new AsmFunction()
{
Address = entryPointRVA,
2025-04-18 16:29:53 +03:00
Blocks = blocks,
};
// Verify that the entry block exists
var entryBlock = asmFunction.EntryBlock;
if (entryBlock == null)
{
Console.WriteLine($"Warning: No entry block found at RVA 0x{entryPointRVA:X8}");
// Try to find a block at the file offset address (for backward compatibility)
var fallbackBlock = blocks.FirstOrDefault(b => b.Address == (fileOffset + _baseAddress));
if (fallbackBlock != null)
{
Console.WriteLine($"Found fallback entry block at RVA 0x{fallbackBlock.Address:X8}");
}
}
else
{
Console.WriteLine($"Found entry block at RVA 0x{entryBlock.Address:X8}");
}
return asmFunction;
2025-04-18 16:29:53 +03:00
}
/// <summary>
/// Creates and registers a new instruction block in the blocks collection
/// </summary>
/// <param name="blocks">The list of blocks to add to</param>
/// <param name="address">The starting address of the block</param>
/// <param name="instructions">The instructions contained in the block</param>
/// <param name="currentBlock">The current block being processed (null if this is the first block)</param>
/// <param name="isJumpTarget">Whether this block is a jump target</param>
/// <param name="isFallThrough">Whether this block is a fall-through from another block</param>
/// <returns>The newly created block</returns>
public InstructionBlock RegisterBlock(
List<InstructionBlock> blocks,
ulong address,
List<Instruction> instructions,
InstructionBlock? currentBlock = null,
bool isJumpTarget = false,
bool isFallThrough = false)
2025-04-18 16:29:53 +03:00
{
// Check if a block already exists at this address
var existingBlock = blocks.FirstOrDefault(b => b.Address == address);
if (existingBlock != null)
{
// If the current block is not null, update the relationships
if (currentBlock != null)
{
// Add the existing block as a successor to the current block if not already present
if (!currentBlock.Successors.Contains(existingBlock))
{
currentBlock.Successors.Add(existingBlock);
}
// Add the current block as a predecessor to the existing block if not already present
if (!existingBlock.Predecessors.Contains(currentBlock))
{
existingBlock.Predecessors.Add(currentBlock);
}
}
return existingBlock;
}
2025-04-18 16:29:53 +03:00
// Create a new block with the provided address and instructions
var block = new InstructionBlock()
{
Address = address,
Instructions = instructions
};
// Add the block to the collection
blocks.Add(block);
// If the current block is not null, update the relationships
if (currentBlock != null)
{
// Add the new block as a successor to the current block
currentBlock.Successors.Add(block);
// Add the current block as a predecessor to the new block
block.Predecessors.Add(currentBlock);
}
2025-04-18 16:29:53 +03:00
// Log the created block for debugging
Console.WriteLine($"Created block:\n{block}");
return block;
2025-04-18 16:29:53 +03:00
}
}
/// <summary>
/// Represents a basic block of instructions with a single entry and exit point
/// </summary>
public class InstructionBlock
{
/// <summary>
/// The starting address of the block
/// </summary>
public ulong Address { get; set; }
/// <summary>
/// The list of instructions contained in this block
/// </summary>
public List<Instruction> Instructions { get; set; } = [];
2025-04-18 16:29:53 +03:00
/// <summary>
/// The blocks that can transfer control to this block
/// </summary>
public List<InstructionBlock> Predecessors { get; set; } = [];
/// <summary>
/// The blocks that this block can transfer control to
/// </summary>
public List<InstructionBlock> Successors { get; set; } = [];
/// <summary>
/// Returns a string representation of the block, including its address, instructions, and control flow information
2025-04-18 16:29:53 +03:00
/// </summary>
public override string ToString()
{
// Create a string for predecessors
string predecessorsStr = Predecessors.Count > 0
? $"Predecessors: {string.Join(", ", Predecessors.Select(p => $"0x{p.Address:X8}"))}"
: "No predecessors";
// Create a string for successors
string successorsStr = Successors.Count > 0
? $"Successors: {string.Join(", ", Successors.Select(s => $"0x{s.Address:X8}"))}"
: "No successors";
// Return the complete string representation
return $"Address: 0x{Address:X8}\n{predecessorsStr}\n{successorsStr}\n{string.Join("\n", Instructions)}";
2025-04-18 16:29:53 +03:00
}
}