From 157171fa9056f65d7b4ae4cff0f110b79c1f7c72 Mon Sep 17 00:00:00 2001 From: bird_egop Date: Mon, 14 Apr 2025 01:52:33 +0300 Subject: [PATCH] remove more special cases. use standardized api --- X86Disassembler/X86/Disassembler.cs | 302 +++++++++--------- .../X86/Handlers/Call/CallRm32Handler.cs | 30 +- .../X86/Handlers/Cmp/CmpImmWithRm32Handler.cs | 24 +- .../Cmp/CmpImmWithRm32SignExtendedHandler.cs | 2 - .../X86/Handlers/Cmp/CmpImmWithRm8Handler.cs | 3 - X86Disassembler/X86/InstructionDecoder.cs | 106 +++--- .../InstructionTests/HandlerSelectionTests.cs | 36 +-- 7 files changed, 247 insertions(+), 256 deletions(-) diff --git a/X86Disassembler/X86/Disassembler.cs b/X86Disassembler/X86/Disassembler.cs index 7934f14..bd7ad56 100644 --- a/X86Disassembler/X86/Disassembler.cs +++ b/X86Disassembler/X86/Disassembler.cs @@ -61,6 +61,140 @@ public class Disassembler }; } + /// + /// Handles the special case of segment override prefixes followed by FF 75 XX (PUSH dword ptr [ebp+XX]) + /// + /// The instruction decoder + /// The current position in the buffer + /// The special instruction, or null if not applicable + private Instruction? HandleSegmentPushSpecialCase(InstructionDecoder decoder, int position) + { + // Check if we have the pattern: segment prefix + FF 75 XX + if (position + 3 < _length && + IsSegmentOverridePrefix(_codeBuffer[position]) && + _codeBuffer[position + 1] == 0xFF && + _codeBuffer[position + 2] == 0x75) + { + byte segmentPrefix = _codeBuffer[position]; + byte displacement = _codeBuffer[position + 3]; + + // Create a special instruction for this case + string segmentName = GetSegmentOverrideName(segmentPrefix); + + Instruction specialInstruction = new Instruction + { + Address = _baseAddress + (uint)position, + Mnemonic = "push", + Operands = $"dword ptr {segmentName}:[ebp+0x{displacement:X2}]", + RawBytes = new byte[] { segmentPrefix, 0xFF, 0x75, displacement } + }; + + // Skip past this instruction + decoder.SetPosition(position + 4); + + return specialInstruction; + } + + return null; + } + + /// + /// Handles the special case of segment override prefixes + /// + /// The instruction decoder + /// The current position in the buffer + /// The instruction with segment override, or null if not applicable + private Instruction? HandleSegmentOverridePrefix(InstructionDecoder decoder, int position) + { + // If the current byte is a segment override prefix and we have at least 2 bytes + if (position + 1 < _length && IsSegmentOverridePrefix(_codeBuffer[position])) + { + // Save the current position to restore it later if needed + int savedPosition = position; + + // Decode the instruction normally + Instruction? prefixedInstruction = decoder.DecodeInstruction(); + + // If decoding failed or produced more than one instruction, try again with special handling + if (prefixedInstruction == null || prefixedInstruction.Operands == "??") + { + // Restore the position + decoder.SetPosition(savedPosition); + + // Get the segment override prefix + byte segmentPrefix = _codeBuffer[position++]; + + // Skip the prefix and decode the rest of the instruction + decoder.SetPosition(position); + + // Decode the instruction without the prefix + Instruction? baseInstruction = decoder.DecodeInstruction(); + + if (baseInstruction != null) + { + // Apply the segment override prefix manually + string segmentOverride = GetSegmentOverrideName(segmentPrefix); + + // Apply the segment override to the operands + if (baseInstruction.Operands.Contains("[")) + { + baseInstruction.Operands = baseInstruction.Operands.Replace("[", $"{segmentOverride}:["); + } + + // Update the raw bytes to include the prefix + byte[] newRawBytes = new byte[baseInstruction.RawBytes.Length + 1]; + newRawBytes[0] = segmentPrefix; + Array.Copy(baseInstruction.RawBytes, 0, newRawBytes, 1, baseInstruction.RawBytes.Length); + baseInstruction.RawBytes = newRawBytes; + + // Adjust the instruction address to include the base address + baseInstruction.Address = (uint)(savedPosition) + _baseAddress; + + return baseInstruction; + } + } + else + { + // Adjust the instruction address to include the base address + prefixedInstruction.Address += _baseAddress; + return prefixedInstruction; + } + } + + return null; + } + + /// + /// Handles the special case for the problematic sequence 0x08 0x83 0xC1 0x04 + /// + /// The instruction decoder + /// The current position in the buffer + /// The special instruction, or null if not applicable + private Instruction? HandleSpecialSequence(InstructionDecoder decoder, int position) + { + // Special case for the problematic sequence 0x08 0x83 0xC1 0x04 + if (position == 0 && _length >= 4 && + _codeBuffer[0] == 0x08 && _codeBuffer[1] == 0x83 && + _codeBuffer[2] == 0xC1 && _codeBuffer[3] == 0x04) + { + // Handle the first instruction (0x08) - OR instruction with incomplete operands + Instruction orInstruction = new Instruction + { + Address = _baseAddress, + Mnemonic = "or", + Operands = "??", + RawBytes = new byte[] { 0x08 } + }; + + // Advance the position to the next instruction + decoder.SetPosition(1); + + return orInstruction; + } + + return null; + } + /// /// Disassembles the code buffer and returns the disassembled instructions /// @@ -78,157 +212,37 @@ public class Disassembler int position = decoder.GetPosition(); // Check if we've reached the end of the buffer - if (position >= _length) + if (!decoder.CanReadByte()) { break; } - - // Special case for segment override prefixes followed by FF 75 XX (PUSH dword ptr [ebp+XX]) - if (position + 3 < _length && - IsSegmentOverridePrefix(_codeBuffer[position]) && - _codeBuffer[position + 1] == 0xFF && - _codeBuffer[position + 2] == 0x75) - { - byte segmentPrefix = _codeBuffer[position]; - byte displacement = _codeBuffer[position + 3]; - - // Create a special instruction for this case - string segmentName = GetSegmentOverrideName(segmentPrefix); - - Instruction specialInstruction = new Instruction - { - Address = _baseAddress + (uint)position, - Mnemonic = "push", - Operands = $"dword ptr {segmentName}:[ebp+0x{displacement:X2}]", - RawBytes = new byte[] { segmentPrefix, 0xFF, 0x75, displacement } - }; - - instructions.Add(specialInstruction); - - // Skip past this instruction - decoder.SetPosition(position + 4); - - // Continue with the next instruction - continue; - } - - // Special case for segment override prefixes - // If the current byte is a segment override prefix and we have at least 2 bytes - if (position + 1 < _length && IsSegmentOverridePrefix(_codeBuffer[position])) - { - // Save the current position to restore it later if needed - int savedPosition = position; - - // Decode the instruction normally - Instruction? prefixedInstruction = decoder.DecodeInstruction(); - - // If decoding failed or produced more than one instruction, try again with special handling - if (prefixedInstruction == null || prefixedInstruction.Operands == "??") - { - // Restore the position - decoder.SetPosition(savedPosition); - - // Get the segment override prefix - byte segmentPrefix = _codeBuffer[position++]; - - // Skip the prefix and decode the rest of the instruction - decoder.SetPosition(position); - - // Decode the instruction without the prefix - Instruction? baseInstruction = decoder.DecodeInstruction(); - - if (baseInstruction != null) - { - // Apply the segment override prefix manually - string segmentOverride = GetSegmentOverrideName(segmentPrefix); - - // Apply the segment override to the operands - if (baseInstruction.Operands.Contains("[")) - { - baseInstruction.Operands = baseInstruction.Operands.Replace("[", $"{segmentOverride}:["); - } - - // Update the raw bytes to include the prefix - byte[] newRawBytes = new byte[baseInstruction.RawBytes.Length + 1]; - newRawBytes[0] = segmentPrefix; - Array.Copy(baseInstruction.RawBytes, 0, newRawBytes, 1, baseInstruction.RawBytes.Length); - baseInstruction.RawBytes = newRawBytes; - - // Adjust the instruction address to include the base address - baseInstruction.Address = (uint)(savedPosition) + _baseAddress; - - // Add the instruction to the list - instructions.Add(baseInstruction); - - // Continue with the next instruction - continue; - } - } - - // If we got here, the normal decoding worked fine - if (prefixedInstruction != null) - { - // Adjust the instruction address to include the base address - prefixedInstruction.Address += _baseAddress; - - // Add the instruction to the list - instructions.Add(prefixedInstruction); - } - - // Continue with the next instruction - continue; - } - - // Special case for the problematic sequence 0x08 0x83 0xC1 0x04 - // If we're at position 0 and have at least 4 bytes, and the sequence matches - if (position == 0 && _length >= 4 && - _codeBuffer[0] == 0x08 && _codeBuffer[1] == 0x83 && - _codeBuffer[2] == 0xC1 && _codeBuffer[3] == 0x04) - { - // Handle the first instruction (0x08) - OR instruction with incomplete operands - Instruction orInstruction = new Instruction - { - Address = _baseAddress, - Mnemonic = "or", - Operands = "??", - RawBytes = new byte[] { 0x08 } - }; - instructions.Add(orInstruction); - - // Advance the position to the next instruction - decoder.SetPosition(1); - - // Handle the second instruction (0x83 0xC1 0x04) - ADD ecx, 0x04 - Instruction addInstruction = new Instruction - { - Address = _baseAddress + 1, - Mnemonic = "add", - Operands = "ecx, 0x00000004", - RawBytes = new byte[] { 0x83, 0xC1, 0x04 } - }; - instructions.Add(addInstruction); - - // Advance the position past the ADD instruction - decoder.SetPosition(4); - - // Continue with the next instruction - continue; - } - - // Decode the next instruction normally + + // If no special case applies, decode normally Instruction? instruction = decoder.DecodeInstruction(); - // Check if decoding failed - if (instruction == null) + if (instruction != null) { - break; + // Adjust the instruction address to include the base address + instruction.Address += _baseAddress; + + // Add the instruction to the list + instructions.Add(instruction); + } + else + { + // If decoding failed, create a dummy instruction for the unknown byte + byte unknownByte = decoder.ReadByte(); + + Instruction dummyInstruction = new Instruction + { + Address = _baseAddress + (uint)position, + Mnemonic = "db", // Define Byte directive + Operands = $"0x{unknownByte:X2}", + RawBytes = new byte[] { unknownByte } + }; + + instructions.Add(dummyInstruction); } - - // Adjust the instruction address to include the base address - instruction.Address += _baseAddress; - - // Add the instruction to the list - instructions.Add(instruction); } return instructions; diff --git a/X86Disassembler/X86/Handlers/Call/CallRm32Handler.cs b/X86Disassembler/X86/Handlers/Call/CallRm32Handler.cs index 1aaccda..856127c 100644 --- a/X86Disassembler/X86/Handlers/Call/CallRm32Handler.cs +++ b/X86Disassembler/X86/Handlers/Call/CallRm32Handler.cs @@ -1,7 +1,7 @@ namespace X86Disassembler.X86.Handlers.Call; /// -/// Handler for CALL r/m32 instruction (0xFF /2) +/// Handler for CALL r/m32 instruction (FF /2) /// public class CallRm32Handler : InstructionHandler { @@ -23,7 +23,26 @@ public class CallRm32Handler : InstructionHandler /// True if this handler can decode the opcode public override bool CanHandle(byte opcode) { - return opcode == 0xFF; + // CALL r/m32 is encoded as FF /2 + if (opcode != 0xFF) + { + return false; + } + + // Check if we have enough bytes to read the ModR/M byte + if (!Decoder.CanReadByte()) + { + return false; + } + + // Peek at the ModR/M byte without advancing the position + byte modRM = CodeBuffer[Decoder.GetPosition()]; + + // Extract the reg field (bits 3-5) + byte reg = (byte)((modRM & 0x38) >> 3); + + // CALL r/m32 is encoded as FF /2 (reg field = 2) + return reg == 2; } /// @@ -34,6 +53,7 @@ public class CallRm32Handler : InstructionHandler /// True if the instruction was successfully decoded public override bool Decode(byte opcode, Instruction instruction) { + // Check if we have enough bytes for the ModR/M byte if (!Decoder.CanReadByte()) { return false; @@ -42,12 +62,6 @@ public class CallRm32Handler : InstructionHandler // Read the ModR/M byte var (mod, reg, rm, destOperand) = ModRMDecoder.ReadModRM(); - // CALL r/m32 is encoded as FF /2 - if (reg != RegisterIndex.C) - { - return false; - } - // Set the mnemonic instruction.Mnemonic = "call"; diff --git a/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm32Handler.cs b/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm32Handler.cs index 707d2a5..0a66287 100644 --- a/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm32Handler.cs +++ b/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm32Handler.cs @@ -49,37 +49,27 @@ public class CmpImmWithRm32Handler : InstructionHandler // Read the ModR/M byte var (mod, reg, rm, memOperand) = ModRMDecoder.ReadModRM(); - - // Get the position after decoding the ModR/M byte - int position = Decoder.GetPosition(); - - // Check if we have enough bytes for the immediate value - if (!Decoder.CanReadUInt()) - { - return false; // Not enough bytes for the immediate value - } // Read the immediate value + if (!Decoder.CanReadUInt()) + { + return false; + } + uint imm32 = Decoder.ReadUInt32(); // Format the destination operand based on addressing mode - string destOperand; if (mod == 3) // Register addressing mode { // Get 32-bit register name - destOperand = ModRMDecoder.GetRegisterName(rm, 32); - } - else // Memory addressing mode - { - // Memory operand already includes dword ptr prefix - destOperand = memOperand; + memOperand = ModRMDecoder.GetRegisterName(rm, 32); } // Format the immediate value string immStr = $"0x{imm32:X8}"; // Set the operands - instruction.Operands = $"{destOperand}, {immStr}"; + instruction.Operands = $"{memOperand}, {immStr}"; return true; } diff --git a/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm32SignExtendedHandler.cs b/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm32SignExtendedHandler.cs index 6ddf642..610d6e3 100644 --- a/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm32SignExtendedHandler.cs +++ b/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm32SignExtendedHandler.cs @@ -48,8 +48,6 @@ public class CmpImmWithRm32SignExtendedHandler : InstructionHandler // Set the mnemonic instruction.Mnemonic = "cmp"; - int position = Decoder.GetPosition(); - if (!Decoder.CanReadByte()) { return false; diff --git a/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm8Handler.cs b/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm8Handler.cs index a3f6cc6..1075650 100644 --- a/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm8Handler.cs +++ b/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm8Handler.cs @@ -50,9 +50,6 @@ public class CmpImmWithRm8Handler : InstructionHandler // Read the ModR/M byte var (mod, reg, rm, memOperand) = ModRMDecoder.ReadModRM(); - - // Get the position after decoding the ModR/M byte - int position = Decoder.GetPosition(); // Check if we have enough bytes for the immediate value if (!Decoder.CanReadByte()) diff --git a/X86Disassembler/X86/InstructionDecoder.cs b/X86Disassembler/X86/InstructionDecoder.cs index 846dee4..9a36e77 100644 --- a/X86Disassembler/X86/InstructionDecoder.cs +++ b/X86Disassembler/X86/InstructionDecoder.cs @@ -11,20 +11,20 @@ public class InstructionDecoder { // The buffer containing the code to decode private readonly byte[] _codeBuffer; - + // The length of the buffer private readonly int _length; - + // The current position in the buffer private int _position; - + // The instruction handler factory private readonly InstructionHandlerFactory _handlerFactory; - + // Specialized decoders private readonly PrefixDecoder _prefixDecoder; private readonly ModRMDecoder _modRMDecoder; - + /// /// Initializes a new instance of the InstructionDecoder class /// @@ -35,40 +35,40 @@ public class InstructionDecoder _codeBuffer = codeBuffer; _length = length; _position = 0; - + // Create specialized decoders _prefixDecoder = new PrefixDecoder(); _modRMDecoder = new ModRMDecoder(codeBuffer, this, length); - + // Create the instruction handler factory _handlerFactory = new InstructionHandlerFactory(_codeBuffer, this, _length); } - + /// /// Decodes an instruction at the current position /// /// The decoded instruction, or null if the decoding failed public Instruction? DecodeInstruction() { - if (_position >= _length) + if (!CanReadByte()) { return null; } - + // Reset prefix flags _prefixDecoder.Reset(); - + // Save the start position of the instruction int startPosition = _position; - + // Create a new instruction Instruction instruction = new Instruction { - Address = (uint)startPosition, + Address = (uint) startPosition, }; - + // Handle prefixes - while (_position < _length) + while (CanReadByte()) { byte prefix = _codeBuffer[_position]; @@ -82,7 +82,7 @@ public class InstructionDecoder } } - if (_position >= _length) + if (!CanReadByte()) { // If we reached the end of the buffer while processing prefixes, // create an instruction with just the prefix information @@ -90,7 +90,7 @@ public class InstructionDecoder { instruction.Mnemonic = _prefixDecoder.GetSegmentOverride(); instruction.Operands = ""; - + // Set the raw bytes int length = _position - startPosition; instruction.RawBytes = new byte[length]; @@ -98,47 +98,53 @@ public class InstructionDecoder return instruction; } - + return null; } - + // Read the opcode - byte opcode = _codeBuffer[_position++]; + byte opcode = ReadByte(); // Get a handler for the opcode var handler = _handlerFactory.GetHandler(opcode); - + Debug.WriteLine($"Resolved handler {handler?.GetType().Name}"); - + bool handlerSuccess = false; - + // Try to decode with a handler first if (handler != null) { // Store the current segment override state bool hasSegmentOverride = _prefixDecoder.HasSegmentOverridePrefix(); string segmentOverride = _prefixDecoder.GetSegmentOverride(); - + // Decode the instruction handlerSuccess = handler.Decode(opcode, instruction); - + // Apply segment override prefix to the operands if needed if (handlerSuccess && hasSegmentOverride) { instruction.Operands = _prefixDecoder.ApplySegmentOverride(instruction.Operands); } } - + else + { + instruction.Mnemonic = "Handler Not Found For opcode: " + opcode; + instruction.Operands = "??"; + handlerSuccess = true; + } + // If no handler is found or decoding fails, create a default instruction if (!handlerSuccess) { - instruction.Mnemonic = OpcodeMap.GetMnemonic(opcode); + instruction.Mnemonic = $"Handler {handler?.GetType().Name} failed for opcode. " + OpcodeMap.GetMnemonic(opcode); instruction.Operands = "??"; } - + // Apply REP/REPNE prefix to the mnemonic if needed instruction.Mnemonic = _prefixDecoder.ApplyRepPrefix(instruction.Mnemonic); - + // Set the raw bytes int bytesLength = _position - startPosition; instruction.RawBytes = new byte[bytesLength]; @@ -146,7 +152,7 @@ public class InstructionDecoder return instruction; } - + /// /// Gets the current position in the buffer /// @@ -155,7 +161,7 @@ public class InstructionDecoder { return _position; } - + /// /// Sets the current position in the buffer /// @@ -164,7 +170,7 @@ public class InstructionDecoder { _position = position; } - + /// /// Checks if the operand size prefix is present /// @@ -173,7 +179,7 @@ public class InstructionDecoder { return _prefixDecoder.HasOperandSizePrefix(); } - + /// /// Checks if the address size prefix is present /// @@ -182,7 +188,7 @@ public class InstructionDecoder { return _prefixDecoder.HasAddressSizePrefix(); } - + /// /// Checks if a segment override prefix is present /// @@ -191,7 +197,7 @@ public class InstructionDecoder { return _prefixDecoder.HasSegmentOverridePrefix(); } - + /// /// Gets the segment override prefix /// @@ -200,7 +206,7 @@ public class InstructionDecoder { return _prefixDecoder.GetSegmentOverride(); } - + /// /// Checks if the LOCK prefix is present /// @@ -209,7 +215,7 @@ public class InstructionDecoder { return _prefixDecoder.HasLockPrefix(); } - + /// /// Checks if the REP/REPNE prefix is present /// @@ -218,7 +224,7 @@ public class InstructionDecoder { return _prefixDecoder.HasRepPrefix(); } - + /// /// Checks if the instruction has an operand size override prefix (0x66) /// @@ -254,7 +260,7 @@ public class InstructionDecoder { return _position + 3 < _length; } - + /// /// Reads a byte from the buffer and advances the position /// @@ -265,10 +271,10 @@ public class InstructionDecoder { return 0; } - + return _codeBuffer[_position++]; } - + /// /// Reads a 16-bit value from the buffer and advances the position /// @@ -279,12 +285,12 @@ public class InstructionDecoder { return 0; } - - ushort value = (ushort)(_codeBuffer[_position] | (_codeBuffer[_position + 1] << 8)); + + ushort value = (ushort) (_codeBuffer[_position] | (_codeBuffer[_position + 1] << 8)); _position += 2; return value; } - + /// /// Reads a 32-bit value from the buffer and advances the position /// @@ -295,12 +301,12 @@ public class InstructionDecoder { return 0; } - - uint value = (uint)(_codeBuffer[_position] | - (_codeBuffer[_position + 1] << 8) | - (_codeBuffer[_position + 2] << 16) | - (_codeBuffer[_position + 3] << 24)); + + uint value = (uint) (_codeBuffer[_position] | + (_codeBuffer[_position + 1] << 8) | + (_codeBuffer[_position + 2] << 16) | + (_codeBuffer[_position + 3] << 24)); _position += 4; return value; } -} +} \ No newline at end of file diff --git a/X86DisassemblerTests/InstructionTests/HandlerSelectionTests.cs b/X86DisassemblerTests/InstructionTests/HandlerSelectionTests.cs index 287fbe9..2472b80 100644 --- a/X86DisassemblerTests/InstructionTests/HandlerSelectionTests.cs +++ b/X86DisassemblerTests/InstructionTests/HandlerSelectionTests.cs @@ -16,43 +16,15 @@ public class HandlerSelectionTests public void InstructionHandlerFactory_DoesNotSelectIncRegHandler_For0x83Opcode() { // Arrange - byte[] codeBuffer = new byte[] { 0x83, 0xC1, 0x04 }; // ADD ecx, 0x04 + byte[] codeBuffer = new byte[] {0x83, 0xC1, 0x04}; // ADD ecx, 0x04 var decoder = new InstructionDecoder(codeBuffer, codeBuffer.Length); var factory = new InstructionHandlerFactory(codeBuffer, decoder, codeBuffer.Length); - + // Act var handler = factory.GetHandler(0x83); - + // Assert Assert.NotNull(handler); Assert.IsNotType(handler); } - - /// - /// Tests the specific problematic sequence - /// - [Fact] - public void InstructionHandlerFactory_HandlesProblematicSequence_Correctly() - { - // Arrange - This is the sequence from the problematic example - byte[] codeBuffer = new byte[] { 0x08, 0x83, 0xC1, 0x04, 0x50, 0xE8, 0x42, 0x01, 0x00, 0x00 }; - var disassembler = new Disassembler(codeBuffer, 0); - - // Act - Disassemble the entire sequence - var instructions = disassembler.Disassemble(); - - // Assert - We should have at least 3 instructions - Assert.True(instructions.Count >= 3, $"Expected at least 3 instructions, but got {instructions.Count}"); - - // First instruction should be OR - Assert.Equal("or", instructions[0].Mnemonic); - - // Second instruction should be ADD ecx, imm8 - Assert.Equal("add", instructions[1].Mnemonic); - Assert.Equal("ecx, 0x00000004", instructions[1].Operands); - - // Third instruction should be PUSH eax - Assert.Equal("push", instructions[2].Mnemonic); - Assert.Equal("eax", instructions[2].Operands); - } -} +} \ No newline at end of file