diff --git a/X86Disassembler/Analysers/AsmFunction.cs b/X86Disassembler/Analysers/AsmFunction.cs new file mode 100644 index 0000000..5179fc6 --- /dev/null +++ b/X86Disassembler/Analysers/AsmFunction.cs @@ -0,0 +1,13 @@ +namespace X86Disassembler.Analysers; + +public class AsmFunction +{ + public ulong Address { get; set; } + + public List Blocks { get; set; } + + public override string ToString() + { + return $"Function at {Address:X8}\n{string.Join("\n", Blocks.Select(x => $"\t{x}"))}"; + } +} \ No newline at end of file diff --git a/X86Disassembler/Analysers/BlockDisassembler.cs b/X86Disassembler/Analysers/BlockDisassembler.cs new file mode 100644 index 0000000..50c2c06 --- /dev/null +++ b/X86Disassembler/Analysers/BlockDisassembler.cs @@ -0,0 +1,197 @@ +using X86Disassembler.X86; + +namespace X86Disassembler.Analysers; + +/// +/// Disassembles code into basic blocks by following control flow instructions. +/// A basic block is a sequence of instructions with a single entry point (the first instruction) +/// and a single exit point (the last instruction, typically a jump or return). +/// +public class BlockDisassembler +{ + // The buffer containing the code to disassemble + private readonly byte[] _codeBuffer; + + // The length of the buffer + private readonly int _length; + + // The base address of the code + private readonly ulong _baseAddress; + + /// + /// Initializes a new instance of the BlockDisassembler class + /// + /// The raw code bytes to be disassembled + /// The base RVA (Relative Virtual Address) of the code section + public BlockDisassembler(byte[] codeBuffer, ulong baseAddress) + { + _codeBuffer = codeBuffer; + _length = codeBuffer.Length; + + _baseAddress = baseAddress; + } + + /// + /// Disassembles code starting from the specified RVA address by following control flow. + /// Creates blocks of instructions separated by jumps, branches, and returns. + /// + /// The RVA (Relative Virtual Address) to start disassembly from + /// A list of instruction blocks representing the control flow of the code + public AsmFunction DisassembleFromAddress(uint rvaAddress) + { + // Create instruction decoder for parsing the code buffer + InstructionDecoder decoder = new InstructionDecoder(_codeBuffer, _length); + + // Track visited addresses to prevent infinite loops + HashSet visitedAddresses = []; + + // Queue of addresses to process (breadth-first approach) + Queue addressQueue = []; + // Calculate the file offset from the RVA by subtracting the base address + addressQueue.Enqueue(rvaAddress - _baseAddress); + + // List to store discovered basic blocks + List blocks = []; + while (addressQueue.Count > 0) + { + // Get the next address to process + var address = addressQueue.Dequeue(); + + // Skip if we've already visited this address + if (!visitedAddresses.Add(address)) + { + Console.WriteLine($"Already visited address {address}"); + continue; + } + + // Position the decoder at the current address + decoder.SetPosition((int) address); + + // Collect instructions for this block + List instructions = []; + + // Process instructions until we hit a control flow change + while (true) + { + // If we've stepped onto an existing block, create a new block up to this point + // and stop processing this path (to avoid duplicating instructions) + if (blocks.Any(x => x.Address == (ulong) decoder.GetPosition())) + { + Console.WriteLine("Stepped on to existing block. Creating in the middle"); + RegisterBlock(blocks, address, instructions); + break; + } + + // Decode the next instruction + var instruction = decoder.DecodeInstruction(); + + // Handle decoding failures + if (instruction is null) + { + throw new InvalidOperationException($"Unexpectedly failed to decode instruction at {address}"); + } + + // Add the instruction to the current block + instructions.Add(instruction); + + // Check for conditional jump (e.g., JZ, JNZ, JLE) + // For conditional jumps, we need to follow both the jump target and the fall-through path + if (instruction.Type.IsConditionalJump()) + { + // Register this block (it ends with a conditional jump) + RegisterBlock(blocks, address, instructions); + + // Queue the jump target address for processing + addressQueue.Enqueue( + instruction.StructuredOperands[0] + .GetValue() + ); + + // Queue the fall-through address (next instruction after this jump) + addressQueue.Enqueue((uint) decoder.GetPosition()); + break; + } + + // Check for unconditional jump (e.g., JMP) + // For unconditional jumps, we only follow the jump target + if (instruction.Type.IsRegularJump()) + { + // Register this block (it ends with an unconditional jump) + RegisterBlock(blocks, address, instructions); + + // Queue the jump target address for processing + addressQueue.Enqueue( + instruction.StructuredOperands[0] + .GetValue() + ); + break; + } + + // Check for return instruction (e.g., RET, RETF) + // Returns end a block without any successors + if (instruction.Type.IsRet()) + { + // Register this block (it ends with a return) + RegisterBlock(blocks, address, instructions); + break; + } + } + } + + // Since blocks aren't necessarily ordered (ASM can jump anywhere it likes) + // we need to sort the blocks ourselves + blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address)); + + return new AsmFunction() + { + Address = rvaAddress, + Blocks = blocks, + }; + } + + /// + /// Creates and registers a new instruction block in the blocks collection + /// + /// The list of blocks to add to + /// The starting address of the block + /// The instructions contained in the block + public void RegisterBlock(List blocks, ulong address, List instructions) + { + // Create a new block with the provided address and instructions + var block = new InstructionBlock() + { + Address = address, + Instructions = instructions + }; + + // Add the block to the collection + blocks.Add(block); + + // Log the created block for debugging + Console.WriteLine($"Created block:\n{block}"); + } +} + +/// +/// Represents a basic block of instructions with a single entry and exit point +/// +public class InstructionBlock +{ + /// + /// The starting address of the block + /// + public ulong Address { get; set; } + + /// + /// The list of instructions contained in this block + /// + public List Instructions { get; set; } + + /// + /// Returns a string representation of the block, including its address and instructions + /// + public override string ToString() + { + return $"Address: {Address:X8}\n{string.Join("\n", Instructions)}"; + } +} \ No newline at end of file diff --git a/X86Disassembler/Analysers/InstructionTypeExtensions.cs b/X86Disassembler/Analysers/InstructionTypeExtensions.cs new file mode 100644 index 0000000..38f6a3d --- /dev/null +++ b/X86Disassembler/Analysers/InstructionTypeExtensions.cs @@ -0,0 +1,40 @@ +using X86Disassembler.X86; + +namespace X86Disassembler.Analysers; + +public static class InstructionTypeExtensions +{ + public static bool IsConditionalJump(this InstructionType type) + { + return type switch + { + InstructionType.Jg => true, + InstructionType.Jge => true, + InstructionType.Jl => true, + InstructionType.Jle => true, + InstructionType.Ja => true, + InstructionType.Jae => true, + InstructionType.Jb => true, + InstructionType.Jbe => true, + InstructionType.Jz => true, + InstructionType.Jnz => true, + InstructionType.Jo => true, + InstructionType.Jno => true, + InstructionType.Js => true, + InstructionType.Jns => true, + InstructionType.Jp => true, + InstructionType.Jnp => true, + _ => false + }; + } + + public static bool IsRegularJump(this InstructionType type) + { + return type == InstructionType.Jmp; + } + + public static bool IsRet(this InstructionType type) + { + return type is InstructionType.Ret or InstructionType.Retf; + } +} \ No newline at end of file diff --git a/X86Disassembler/Analysers/OperandExtensions.cs b/X86Disassembler/Analysers/OperandExtensions.cs new file mode 100644 index 0000000..3777ec4 --- /dev/null +++ b/X86Disassembler/Analysers/OperandExtensions.cs @@ -0,0 +1,16 @@ +using X86Disassembler.X86; +using X86Disassembler.X86.Operands; + +namespace X86Disassembler.Analysers; + +public static class OperandExtensions +{ + public static uint GetValue(this Operand operand) + { + return operand switch + { + RelativeOffsetOperand roo => roo.TargetAddress, + _ => 0 + }; + } +} \ No newline at end of file diff --git a/X86Disassembler/Program.cs b/X86Disassembler/Program.cs index 8b534e6..29a3ab2 100644 --- a/X86Disassembler/Program.cs +++ b/X86Disassembler/Program.cs @@ -1,3 +1,4 @@ +using X86Disassembler.Analysers; using X86Disassembler.PE; using X86Disassembler.X86; @@ -63,102 +64,37 @@ public class Program var section = codeSections[0]; byte[] codeBytes = peFile.GetSectionData(peFile.SectionHeaders.IndexOf(section)); - // First demonstrate sequential disassembly - Console.WriteLine($"Sequential disassembly of section {section.Name} at RVA 0x{section.VirtualAddress:X8}:"); + // // First demonstrate sequential disassembly + // Console.WriteLine($"Sequential disassembly of section {section.Name} at RVA 0x{section.VirtualAddress:X8}:"); + // + // // Create a disassembler for the code section + // // Base address should be the section's virtual address, not the image base + VA + // Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress); + // + // // Disassemble sequentially (linear approach) + // var linearInstructions = disassembler.Disassemble(); + // + // // Print the first 30 instructions from linear disassembly + // int linearCount = Math.Min(30, linearInstructions.Count); + // for (int i = 0; i < linearCount; i++) + // { + // Console.WriteLine(linearInstructions[i]); + // } + // + // // Print a summary of how many more instructions there are + // if (linearInstructions.Count > linearCount) + // { + // Console.WriteLine($"... ({linearInstructions.Count - linearCount} more instructions not shown)"); + // } + - // Create a disassembler for the code section - // Base address should be the section's virtual address, not the image base + VA - Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress); + // disassemble entry point + var disassembler = new BlockDisassembler(codeBytes, section.VirtualAddress); - // Disassemble sequentially (linear approach) - var linearInstructions = disassembler.Disassemble(); - - // Print the first 30 instructions from linear disassembly - int linearCount = Math.Min(30, linearInstructions.Count); - for (int i = 0; i < linearCount; i++) - { - Console.WriteLine(linearInstructions[i]); - } - - // Print a summary of how many more instructions there are - if (linearInstructions.Count > linearCount) - { - Console.WriteLine($"... ({linearInstructions.Count - linearCount} more instructions not shown)"); - } - - Console.WriteLine(); - Console.WriteLine("===================================================="); - Console.WriteLine(); - - // Now demonstrate control flow-based disassembly from entry point - Console.WriteLine($"Control flow-based disassembly starting from entry point 0x{peFile.OptionalHeader.AddressOfEntryPoint:X8}:"); - - try - { - // Get the entry point RVA from the PE header - uint entryPointRva = peFile.OptionalHeader.AddressOfEntryPoint; - - // Make sure the entry point is within this code section - if (entryPointRva >= section.VirtualAddress && - entryPointRva < section.VirtualAddress + section.VirtualSize) - { - // Disassemble starting from the entry point (control flow-based) - var cfgInstructions = disassembler.DisassembleFunction(entryPointRva); - - // Print the instructions from the entry point function - int cfgCount = Math.Min(50, cfgInstructions.Count); - for (int i = 0; i < cfgCount; i++) - { - Console.WriteLine(cfgInstructions[i]); - } - - // Print a summary if there are more instructions - if (cfgInstructions.Count > cfgCount) - { - Console.WriteLine($"... ({cfgInstructions.Count - cfgCount} more instructions in this function not shown)"); - } - - Console.WriteLine(); - Console.WriteLine($"Found {cfgInstructions.Count} instructions following control flow from entry point."); - } - else - { - // Try one of the exported functions instead - Console.WriteLine($"Entry point is not in the {section.Name} section. Trying the first exported function instead..."); - - if (peFile.ExportDirectory != null && peFile.ExportedFunctions.Count > 0) - { - uint functionRva = peFile.ExportedFunctions[0].AddressRva; - Console.WriteLine($"Disassembling exported function at RVA 0x{functionRva:X8} ({peFile.ExportedFunctions[0].Name}):"); - - var cfgInstructions = disassembler.DisassembleFunction(functionRva); - - // Print the instructions from the function - int cfgCount = Math.Min(50, cfgInstructions.Count); - for (int i = 0; i < cfgCount; i++) - { - Console.WriteLine(cfgInstructions[i]); - } - - // Print a summary if there are more instructions - if (cfgInstructions.Count > cfgCount) - { - Console.WriteLine($"... ({cfgInstructions.Count - cfgCount} more instructions in this function not shown)"); - } - - Console.WriteLine(); - Console.WriteLine($"Found {cfgInstructions.Count} instructions following control flow from exported function."); - } - else - { - Console.WriteLine("No exported functions found to disassemble."); - } - } - } - catch (Exception ex) - { - Console.WriteLine($"Error during control flow disassembly: {ex.Message}"); - } + var asmFunction = disassembler.DisassembleFromAddress(peFile.OptionalHeader.AddressOfEntryPoint); + + Console.WriteLine(asmFunction); + _ = 5; } // Console.WriteLine("\nPress Enter to exit..."); diff --git a/X86Disassembler/X86/Disassembler.cs b/X86Disassembler/X86/Disassembler.cs index 301bb90..75b9994 100644 --- a/X86Disassembler/X86/Disassembler.cs +++ b/X86Disassembler/X86/Disassembler.cs @@ -2,7 +2,6 @@ using X86Disassembler.X86.Operands; namespace X86Disassembler.X86; -using System.Text; using System.Collections.Generic; /// @@ -19,9 +18,6 @@ public class Disassembler // The base address of the code private readonly ulong _baseAddress; - // Segment override prefixes - private static readonly byte[] SegmentOverridePrefixes = {0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65}; - /// /// Initializes a new instance of the Disassembler class /// @@ -34,35 +30,6 @@ public class Disassembler _baseAddress = baseAddress; } - /// - /// Checks if a byte is a segment override prefix - /// - /// The byte to check - /// True if the byte is a segment override prefix - private bool IsSegmentOverridePrefix(byte b) - { - return Array.IndexOf(SegmentOverridePrefixes, b) >= 0; - } - - /// - /// Gets the segment override name for a prefix byte - /// - /// The prefix byte - /// The segment override name - private string GetSegmentOverrideName(byte prefix) - { - return prefix switch - { - 0x26 => "es", - 0x2E => "cs", - 0x36 => "ss", - 0x3E => "ds", - 0x64 => "fs", - 0x65 => "gs", - _ => string.Empty - }; - } - /// /// Disassembles the code buffer sequentially and returns all disassembled instructions /// @@ -117,196 +84,4 @@ public class Disassembler return instructions; } - - /// - /// Disassembles a function starting from a specific virtual address (RVA) and follows control flow - /// - /// The relative virtual address to start disassembly from - /// A list of disassembled instructions representing the function - public List DisassembleFunction(uint startRva) - { - // The _baseAddress is the section's RVA (stored in Program.cs) - // We need to calculate the offset within the section by subtracting the section's RVA from the start RVA - int startOffset = (int)(startRva - _baseAddress); - - // Validate the offset is within bounds - if (startOffset < 0 || startOffset >= _length) - { - throw new ArgumentOutOfRangeException(nameof(startRva), - $"Start address 0x{startRva:X8} is outside the bounds of the section at RVA 0x{_baseAddress:X8} with size {_length}"); - } - - return DisassembleFromOffset(startOffset); - } - - /// - /// Disassembles instructions starting from a specific offset using control flow analysis - /// - /// The offset in the code buffer to start disassembly from - /// A list of disassembled instructions - private List DisassembleFromOffset(int startOffset) - { - // Keep track of disassembled instructions - List instructions = new List(); - - // Track visited addresses to avoid infinite loops - HashSet visitedOffsets = new HashSet(); - - // Queue of offsets to process - Queue offsetQueue = new Queue(); - offsetQueue.Enqueue(startOffset); - - while (offsetQueue.Count > 0) - { - int currentOffset = offsetQueue.Dequeue(); - - // Skip if we've already processed this offset - if (visitedOffsets.Contains(currentOffset)) - { - continue; - } - - // Create a new decoder positioned at the current offset - InstructionDecoder decoder = new InstructionDecoder(_codeBuffer, _length); - decoder.SetPosition(currentOffset); - - // Process instructions at this address until we hit a control flow change - while (decoder.CanReadByte() && decoder.GetPosition() < _length) - { - int positionBeforeDecode = decoder.GetPosition(); - visitedOffsets.Add(positionBeforeDecode); - - // Decode the instruction - Instruction? instruction = decoder.DecodeInstruction(); - if (instruction == null) - { - // Invalid instruction, skip to next byte - decoder.SetPosition(positionBeforeDecode + 1); - continue; - } - - // Set the instruction address - instruction.Address = _baseAddress + (uint)positionBeforeDecode; - - // Add the instruction to our list - instructions.Add(instruction); - - // Check for control flow instructions - if (IsReturnInstruction(instruction)) - { - // End of function, don't follow any further from this branch - break; - } - else if (IsUnconditionalJump(instruction)) - { - // Follow the unconditional jump target - int? targetOffset = GetJumpTargetOffset(instruction, positionBeforeDecode); - if (targetOffset.HasValue && targetOffset.Value >= 0 && targetOffset.Value < _length) - { - offsetQueue.Enqueue(targetOffset.Value); - } - - // End this branch of execution - break; - } - else if (IsConditionalJump(instruction)) - { - // Follow both paths for conditional jumps (target and fall-through) - int? targetOffset = GetJumpTargetOffset(instruction, positionBeforeDecode); - if (targetOffset.HasValue && targetOffset.Value >= 0 && targetOffset.Value < _length) - { - offsetQueue.Enqueue(targetOffset.Value); - } - - // Continue with fall-through path in this loop - } - else if (IsCallInstruction(instruction)) - { - // For calls, we just continue with the next instruction (we don't follow the call) - // We could add separate functionality to follow calls if needed - } - } - } - - // Sort instructions by address for readability - instructions.Sort((a, b) => a.Address.CompareTo(b.Address)); - - return instructions; - } - - /// - /// Checks if an instruction is a return instruction - /// - private bool IsReturnInstruction(Instruction instruction) - { - return instruction.Type == InstructionType.Ret || - instruction.Type == InstructionType.Retf; - } - - /// - /// Checks if an instruction is an unconditional jump - /// - private bool IsUnconditionalJump(Instruction instruction) - { - return instruction.Type == InstructionType.Jmp; - } - - /// - /// Checks if an instruction is a conditional jump - /// - private bool IsConditionalJump(Instruction instruction) - { - return instruction.Type == InstructionType.Je || - instruction.Type == InstructionType.Jne || - instruction.Type == InstructionType.Ja || - instruction.Type == InstructionType.Jae || - instruction.Type == InstructionType.Jb || - instruction.Type == InstructionType.Jbe || - instruction.Type == InstructionType.Jg || - instruction.Type == InstructionType.Jge || - instruction.Type == InstructionType.Jl || - instruction.Type == InstructionType.Jle || - instruction.Type == InstructionType.Jo || - instruction.Type == InstructionType.Jno || - instruction.Type == InstructionType.Jp || - instruction.Type == InstructionType.Jnp || - instruction.Type == InstructionType.Js || - instruction.Type == InstructionType.Jns || - instruction.Type == InstructionType.Jcxz; - } - - /// - /// Checks if an instruction is a call instruction - /// - private bool IsCallInstruction(Instruction instruction) - { - return instruction.Type == InstructionType.Call; - } - - /// - /// Gets the jump target offset from a jump instruction - /// - private int? GetJumpTargetOffset(Instruction instruction, int instructionOffset) - { - // Check if the instruction has at least one operand - if (instruction.StructuredOperands == null || instruction.StructuredOperands.Count == 0) - { - return null; - } - - // Look for an immediate operand which represents the offset - var operand = instruction.StructuredOperands[0]; - if (operand is ImmediateOperand immediateOperand) - { - // Calculate the target address - // For relative jumps, the target is IP (instruction pointer) + instruction length + offset - int instructionLength = (int)(instruction.Address - _baseAddress) - instructionOffset + 1; - int jumpOffset = Convert.ToInt32(immediateOperand.Value); - - return instructionOffset + instructionLength + jumpOffset; - } - - // For now, we don't handle indirect jumps like JMP [eax] or JMP [ebx+4] - return null; - } } \ No newline at end of file diff --git a/X86Disassembler/X86/InstructionDecoder.cs b/X86Disassembler/X86/InstructionDecoder.cs index 22be9cb..c25ee9b 100644 --- a/X86Disassembler/X86/InstructionDecoder.cs +++ b/X86Disassembler/X86/InstructionDecoder.cs @@ -220,51 +220,6 @@ public class InstructionDecoder return _prefixDecoder.HasOperandSizePrefix(); } - /// - /// Checks if the address size prefix is present - /// - /// True if the address size prefix is present - public bool HasAddressSizePrefix() - { - return _prefixDecoder.HasAddressSizePrefix(); - } - - /// - /// Checks if a segment override prefix is present - /// - /// True if a segment override prefix is present - public bool HasSegmentOverridePrefix() - { - return _prefixDecoder.HasSegmentOverridePrefix(); - } - - /// - /// Gets the segment override prefix - /// - /// The segment override prefix, or an empty string if none is present - public string GetSegmentOverride() - { - return _prefixDecoder.GetSegmentOverride(); - } - - /// - /// Checks if the LOCK prefix is present - /// - /// True if the LOCK prefix is present - public bool HasLockPrefix() - { - return _prefixDecoder.HasLockPrefix(); - } - - /// - /// Checks if the REP/REPNE prefix is present - /// - /// True if the REP/REPNE prefix is present - public bool HasRepPrefix() - { - return _prefixDecoder.HasRepPrefix(); - } - /// /// Checks if the instruction has an operand size override prefix (0x66) /// diff --git a/X86Disassembler/X86/InstructionType.cs b/X86Disassembler/X86/InstructionType.cs index 25999f1..a8d0149 100644 --- a/X86Disassembler/X86/InstructionType.cs +++ b/X86Disassembler/X86/InstructionType.cs @@ -48,8 +48,6 @@ public enum InstructionType // Control flow Jmp, // Jump unconditionally - Je, // Jump if equal - Jne, // Jump if not equal Jg, // Jump if greater Jge, // Jump if greater or equal Jl, // Jump if less