using X86Disassembler.X86; namespace X86Disassembler.Analysers; /// /// Disassembles code into basic blocks by following control flow instructions. /// A basic block is a sequence of instructions with a single entry point (the first instruction) /// and a single exit point (the last instruction, typically a jump or return). /// public class BlockDisassembler { // The buffer containing the code to disassemble private readonly byte[] _codeBuffer; // The length of the buffer private readonly int _length; // The base address of the code private readonly ulong _baseAddress; /// /// Initializes a new instance of the BlockDisassembler class /// /// The raw code bytes to be disassembled /// The base RVA (Relative Virtual Address) of the code section public BlockDisassembler(byte[] codeBuffer, ulong baseAddress) { _codeBuffer = codeBuffer; _length = codeBuffer.Length; _baseAddress = baseAddress; } /// /// Disassembles code starting from the specified RVA address by following control flow. /// Creates blocks of instructions separated by jumps, branches, and returns. /// /// The RVA (Relative Virtual Address) to start disassembly from /// A list of instruction blocks representing the control flow of the code public AsmFunction DisassembleFromAddress(uint rvaAddress) { // Create instruction decoder for parsing the code buffer InstructionDecoder decoder = new InstructionDecoder(_codeBuffer, _length); // Track visited addresses to prevent infinite loops HashSet visitedAddresses = []; // Queue of addresses to process (breadth-first approach) Queue addressQueue = []; // Calculate the file offset from the RVA by subtracting the base address // Store the file offset for processing, but we'll convert back to RVA when creating blocks ulong fileOffset = rvaAddress - _baseAddress; addressQueue.Enqueue(fileOffset); // Keep track of the original entry point RVA for the function ulong entryPointRVA = rvaAddress; // List to store discovered basic blocks List blocks = []; // Dictionary to track blocks by address for quick lookup Dictionary blocksByAddress = new Dictionary(); while (addressQueue.Count > 0) { // Get the next address to process var address = addressQueue.Dequeue(); // Skip if we've already visited this address if (!visitedAddresses.Add(address)) { Console.WriteLine($"Already visited address {address}"); continue; } // Position the decoder at the current address decoder.SetPosition((int) address); // Collect instructions for this block List instructions = []; // Get the current block if it exists (for tracking predecessors) InstructionBlock? currentBlock = null; if (blocksByAddress.TryGetValue(address, out var existingBlock)) { currentBlock = existingBlock; } // Process instructions until we hit a control flow change while (true) { // Get the current position ulong currentPosition = (ulong)decoder.GetPosition(); // If we've stepped onto an existing block, create a new block up to this point // and stop processing this path (to avoid duplicating instructions) if (blocksByAddress.TryGetValue(currentPosition, out var targetBlock) && currentPosition != address) { Console.WriteLine("Stepped on to existing block. Creating in the middle"); // Register this block and establish the relationship with the target block var newBlock = RegisterBlock(blocks, address, instructions, null, false, false); blocksByAddress[address] = newBlock; // Add the target block as a successor to the new block newBlock.Successors.Add(targetBlock); // Add the new block as a predecessor to the target block targetBlock.Predecessors.Add(newBlock); break; } // Decode the next instruction var instruction = decoder.DecodeInstruction(); // Handle decoding failures if (instruction is null) { throw new InvalidOperationException($"Unexpectedly failed to decode instruction at {address}"); } // Add the instruction to the current block instructions.Add(instruction); // Check for conditional jump (e.g., JZ, JNZ, JLE) // For conditional jumps, we need to follow both the jump target and the fall-through path if (instruction.Type.IsConditionalJump()) { // Get the jump target address uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue(); // Get the fall-through address (next instruction after this jump) uint fallThroughAddress = (uint)decoder.GetPosition(); // Register this block (it ends with a conditional jump) var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false); blocksByAddress[address] = newBlock; // Queue the jump target address for processing addressQueue.Enqueue(jumpTargetAddress); // Queue the fall-through address (next instruction after this jump) addressQueue.Enqueue(fallThroughAddress); break; } // Check for unconditional jump (e.g., JMP) // For unconditional jumps, we only follow the jump target if (instruction.Type.IsRegularJump()) { // Get the jump target address uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue(); // Register this block (it ends with an unconditional jump) var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false); blocksByAddress[address] = newBlock; // Queue the jump target address for processing addressQueue.Enqueue(jumpTargetAddress); break; } // Check for return instruction (e.g., RET, RETF) // Returns end a block without any successors if (instruction.Type.IsRet()) { // Register this block (it ends with a return) var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false); blocksByAddress[address] = newBlock; break; } } } // Since blocks aren't necessarily ordered (ASM can jump anywhere it likes) // we need to sort the blocks ourselves blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address)); // Convert all block addresses from file offsets to RVA foreach (var block in blocks) { // Convert from file offset to RVA by adding the base address ulong rvaBlockAddress = block.Address + _baseAddress; Console.WriteLine($"Converting block address from file offset 0x{block.Address:X8} to RVA 0x{rvaBlockAddress:X8}"); block.Address = rvaBlockAddress; } // Create a new AsmFunction with the RVA address var asmFunction = new AsmFunction() { Address = entryPointRVA, Blocks = blocks, }; // Verify that the entry block exists var entryBlock = asmFunction.EntryBlock; if (entryBlock == null) { Console.WriteLine($"Warning: No entry block found at RVA 0x{entryPointRVA:X8}"); // Try to find a block at the file offset address (for backward compatibility) var fallbackBlock = blocks.FirstOrDefault(b => b.Address == (fileOffset + _baseAddress)); if (fallbackBlock != null) { Console.WriteLine($"Found fallback entry block at RVA 0x{fallbackBlock.Address:X8}"); } } else { Console.WriteLine($"Found entry block at RVA 0x{entryBlock.Address:X8}"); } return asmFunction; } /// /// Creates and registers a new instruction block in the blocks collection /// /// The list of blocks to add to /// The starting address of the block /// The instructions contained in the block /// The current block being processed (null if this is the first block) /// Whether this block is a jump target /// Whether this block is a fall-through from another block /// The newly created block public InstructionBlock RegisterBlock( List blocks, ulong address, List instructions, InstructionBlock? currentBlock = null, bool isJumpTarget = false, bool isFallThrough = false) { // Check if a block already exists at this address var existingBlock = blocks.FirstOrDefault(b => b.Address == address); if (existingBlock != null) { // If the current block is not null, update the relationships if (currentBlock != null) { // Add the existing block as a successor to the current block if not already present if (!currentBlock.Successors.Contains(existingBlock)) { currentBlock.Successors.Add(existingBlock); } // Add the current block as a predecessor to the existing block if not already present if (!existingBlock.Predecessors.Contains(currentBlock)) { existingBlock.Predecessors.Add(currentBlock); } } return existingBlock; } // Create a new block with the provided address and instructions var block = new InstructionBlock() { Address = address, Instructions = instructions }; // Add the block to the collection blocks.Add(block); // If the current block is not null, update the relationships if (currentBlock != null) { // Add the new block as a successor to the current block currentBlock.Successors.Add(block); // Add the current block as a predecessor to the new block block.Predecessors.Add(currentBlock); } // Log the created block for debugging Console.WriteLine($"Created block:\n{block}"); return block; } } /// /// Represents a basic block of instructions with a single entry and exit point /// public class InstructionBlock { /// /// The starting address of the block /// public ulong Address { get; set; } /// /// The list of instructions contained in this block /// public List Instructions { get; set; } = []; /// /// The blocks that can transfer control to this block /// public List Predecessors { get; set; } = []; /// /// The blocks that this block can transfer control to /// public List Successors { get; set; } = []; /// /// Returns a string representation of the block, including its address, instructions, and control flow information /// public override string ToString() { // Create a string for predecessors string predecessorsStr = Predecessors.Count > 0 ? $"Predecessors: {string.Join(", ", Predecessors.Select(p => $"0x{p.Address:X8}"))}" : "No predecessors"; // Create a string for successors string successorsStr = Successors.Count > 0 ? $"Successors: {string.Join(", ", Successors.Select(s => $"0x{s.Address:X8}"))}" : "No successors"; // Return the complete string representation return $"Address: 0x{Address:X8}\n{predecessorsStr}\n{successorsStr}\n{string.Join("\n", Instructions)}"; } }