diff --git a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs index 24f9245..8a30c22 100644 --- a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs +++ b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs @@ -13,6 +13,7 @@ using X86Disassembler.X86.Handlers.Or; using X86Disassembler.X86.Handlers.Pop; using X86Disassembler.X86.Handlers.Push; using X86Disassembler.X86.Handlers.Ret; +using X86Disassembler.X86.Handlers.String; using X86Disassembler.X86.Handlers.Test; using X86Disassembler.X86.Handlers.Xchg; using X86Disassembler.X86.Handlers.Xor; @@ -41,32 +42,36 @@ public class InstructionHandlerFactory _decoder = decoder; _length = length; - RegisterHandlers(); + RegisterAllHandlers(); } /// /// Registers all handlers /// - private void RegisterHandlers() + private void RegisterAllHandlers() { // Register specific instruction handlers _handlers.Add(new Int3Handler(_codeBuffer, _decoder, _length)); - - RegisterArithmeticUnaryHandlers(); + RegisterArithmeticImmediateHandlers(); - RegisterReturnHandlers(); - RegisterCallHandlers(); - RegisterJumpHandlers(); - RegisterTestHandlers(); + RegisterArithmeticUnaryHandlers(); + RegisterAddHandlers(); + RegisterCmpHandlers(); RegisterXorHandlers(); RegisterOrHandlers(); - RegisterLeaHandlers(); - RegisterCmpHandlers(); + RegisterTestHandlers(); + RegisterDataTransferHandlers(); + RegisterJumpHandlers(); + RegisterCallHandlers(); + RegisterReturnHandlers(); RegisterDecHandlers(); RegisterIncHandlers(); - RegisterAddHandlers(); - RegisterDataTransferHandlers(); + RegisterPushHandlers(); + RegisterPopHandlers(); + RegisterLeaHandlers(); RegisterFloatingPointHandlers(); + RegisterStringHandlers(); + RegisterMovHandlers(); } /// @@ -295,6 +300,51 @@ public class InstructionHandlerFactory _handlers.Add(new LoadStoreInt16Handler(_codeBuffer, _decoder, _length)); } + /// + /// Registers all String instruction handlers + /// + private void RegisterStringHandlers() + { + // Add String instruction handlers + _handlers.Add(new RepMovsHandler(_codeBuffer, _decoder, _length)); + } + + /// + /// Registers all MOV instruction handlers + /// + private void RegisterMovHandlers() + { + // Add MOV handlers + _handlers.Add(new MovRegMemHandler(_codeBuffer, _decoder, _length)); + _handlers.Add(new MovMemRegHandler(_codeBuffer, _decoder, _length)); + _handlers.Add(new MovRegImm32Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new MovRegImm8Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new MovEaxMoffsHandler(_codeBuffer, _decoder, _length)); + _handlers.Add(new MovMoffsEaxHandler(_codeBuffer, _decoder, _length)); + _handlers.Add(new MovRm32Imm32Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new MovRm8Imm8Handler(_codeBuffer, _decoder, _length)); + } + + /// + /// Registers all PUSH instruction handlers + /// + private void RegisterPushHandlers() + { + // Add PUSH handlers + _handlers.Add(new PushRegHandler(_codeBuffer, _decoder, _length)); + _handlers.Add(new PushImm32Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new PushImm8Handler(_codeBuffer, _decoder, _length)); + } + + /// + /// Registers all POP instruction handlers + /// + private void RegisterPopHandlers() + { + // Add POP handlers + _handlers.Add(new PopRegHandler(_codeBuffer, _decoder, _length)); + } + /// /// Gets the handler that can decode the given opcode /// diff --git a/X86Disassembler/X86/InstructionDecoder.cs b/X86Disassembler/X86/InstructionDecoder.cs index a8e7725..5af5ddb 100644 --- a/X86Disassembler/X86/InstructionDecoder.cs +++ b/X86Disassembler/X86/InstructionDecoder.cs @@ -19,13 +19,10 @@ public class InstructionDecoder // The instruction handler factory private readonly InstructionHandlerFactory _handlerFactory; - // Instruction prefixes - private bool _operandSizePrefix; - private bool _addressSizePrefix; - private bool _segmentOverridePrefix; - private bool _lockPrefix; - private bool _repPrefix; - private string _segmentOverride; + // Specialized decoders + private readonly PrefixDecoder _prefixDecoder; + private readonly ModRMDecoder _modRMDecoder; + private readonly StringInstructionDecoder _stringDecoder; /// /// Initializes a new instance of the InstructionDecoder class @@ -37,7 +34,11 @@ public class InstructionDecoder _codeBuffer = codeBuffer; _length = length; _position = 0; - _segmentOverride = ""; + + // Create specialized decoders + _prefixDecoder = new PrefixDecoder(); + _modRMDecoder = new ModRMDecoder(codeBuffer, this, length); + _stringDecoder = new StringInstructionDecoder(codeBuffer, length); // Create the instruction handler factory _handlerFactory = new InstructionHandlerFactory(_codeBuffer, this, _length); @@ -55,12 +56,7 @@ public class InstructionDecoder } // Reset prefix flags - _operandSizePrefix = false; - _addressSizePrefix = false; - _segmentOverridePrefix = false; - _lockPrefix = false; - _repPrefix = false; - _segmentOverride = string.Empty; + _prefixDecoder.Reset(); // Save the start position of the instruction int startPosition = _position; @@ -76,54 +72,21 @@ public class InstructionDecoder { byte prefix = _codeBuffer[_position]; - if (prefix == 0x66) // Operand size prefix + if (_prefixDecoder.DecodePrefix(prefix)) { - _operandSizePrefix = true; - _position++; - } - else if (prefix == 0x67) // Address size prefix - { - _addressSizePrefix = true; - _position++; - } - else if ((prefix >= 0x26 && prefix <= 0x3E && (prefix & 0x7) == 0x6) || prefix == 0x64 || prefix == 0x65) // Segment override prefix - { - _segmentOverridePrefix = true; - switch (prefix) - { - case 0x26: _segmentOverride = "es"; break; - case 0x2E: _segmentOverride = "cs"; break; - case 0x36: _segmentOverride = "ss"; break; - case 0x3E: _segmentOverride = "ds"; break; - case 0x64: _segmentOverride = "fs"; break; - case 0x65: _segmentOverride = "gs"; break; - } - _position++; - } - else if (prefix == 0xF0) // LOCK prefix - { - _lockPrefix = true; - _position++; - } - else if (prefix == 0xF2 || prefix == 0xF3) // REP/REPNE prefix - { - _repPrefix = true; _position++; - // Special case for string instructions - if (_position < _length) + // Special case for REP/REPNE prefix followed by string instruction + if ((prefix == 0xF2 || prefix == 0xF3) && _position < _length) { - byte stringOp = _codeBuffer[_position]; - if (stringOp == 0xA4 || stringOp == 0xA5 || // MOVS - stringOp == 0xAA || stringOp == 0xAB || // STOS - stringOp == 0xAC || stringOp == 0xAD || // LODS - stringOp == 0xAE || stringOp == 0xAF) // SCAS + byte nextByte = _codeBuffer[_position]; + if (_stringDecoder.IsStringInstruction(nextByte)) { // Skip the string operation opcode _position++; // Handle REP string instruction - return CreateStringInstruction(prefix, stringOp, startPosition); + return _stringDecoder.CreateStringInstruction(prefix, nextByte, startPosition, _position); } } } @@ -137,9 +100,9 @@ public class InstructionDecoder { // If we reached the end of the buffer while processing prefixes, // create an instruction with just the prefix information - if (_segmentOverridePrefix) + if (_prefixDecoder.HasSegmentOverridePrefix()) { - instruction.Mnemonic = _segmentOverride; + instruction.Mnemonic = _prefixDecoder.GetSegmentOverride(); instruction.Operands = ""; // Set the raw bytes @@ -174,22 +137,9 @@ public class InstructionDecoder instruction.Operands = "??"; } - // Add REP prefix to the instruction if present - if (_repPrefix && !instruction.Mnemonic.StartsWith("rep")) - { - instruction.Mnemonic = $"rep {instruction.Mnemonic}"; - } - - // Add segment override prefix to the instruction if present - if (_segmentOverridePrefix && !string.IsNullOrEmpty(instruction.Operands)) - { - // If the instruction has memory operands, add the segment override - if (instruction.Operands.Contains("[")) - { - // Replace the first '[' with the segment override - instruction.Operands = instruction.Operands.Replace("[", $"{_segmentOverride}:[" ); - } - } + // Apply prefixes to the instruction + instruction.Mnemonic = _prefixDecoder.ApplyRepPrefix(instruction.Mnemonic); + instruction.Operands = _prefixDecoder.ApplySegmentOverride(instruction.Operands); // Set the raw bytes int bytesLength = _position - startPosition; @@ -199,62 +149,6 @@ public class InstructionDecoder return instruction; } - /// - /// Creates an instruction for a string operation with REP/REPNE prefix - /// - /// The REP/REPNE prefix (0xF2 or 0xF3) - /// The string operation opcode - /// The start position of the instruction - /// The created instruction - private Instruction CreateStringInstruction(byte prefix, byte stringOp, int startPosition) - { - // Create a new instruction - Instruction instruction = new Instruction - { - Address = (uint)startPosition, - }; - - // Get the mnemonic for the string operation - string mnemonic = OpcodeMap.GetMnemonic(stringOp); - instruction.Mnemonic = prefix == 0xF3 ? $"rep {mnemonic}" : $"repne {mnemonic}"; - - // Set operands based on the string operation - switch (stringOp) - { - case 0xA4: // MOVSB - instruction.Operands = "byte ptr [edi], byte ptr [esi]"; - break; - case 0xA5: // MOVSD - instruction.Operands = "dword ptr [edi], dword ptr [esi]"; - break; - case 0xAA: // STOSB - instruction.Operands = "byte ptr [edi], al"; - break; - case 0xAB: // STOSD - instruction.Operands = "dword ptr [edi], eax"; - break; - case 0xAC: // LODSB - instruction.Operands = "al, byte ptr [esi]"; - break; - case 0xAD: // LODSD - instruction.Operands = "eax, dword ptr [esi]"; - break; - case 0xAE: // SCASB - instruction.Operands = "al, byte ptr [edi]"; - break; - case 0xAF: // SCASD - instruction.Operands = "eax, dword ptr [edi]"; - break; - } - - // Set the raw bytes - int length = _position - startPosition; - instruction.RawBytes = new byte[length]; - Array.Copy(_codeBuffer, startPosition, instruction.RawBytes, 0, length); - - return instruction; - } - /// /// Gets the current position in the buffer /// @@ -279,7 +173,7 @@ public class InstructionDecoder /// True if the operand size prefix is present public bool HasOperandSizePrefix() { - return _operandSizePrefix; + return _prefixDecoder.HasOperandSizePrefix(); } /// @@ -288,7 +182,7 @@ public class InstructionDecoder /// True if the address size prefix is present public bool HasAddressSizePrefix() { - return _addressSizePrefix; + return _prefixDecoder.HasAddressSizePrefix(); } /// @@ -297,7 +191,7 @@ public class InstructionDecoder /// True if a segment override prefix is present public bool HasSegmentOverridePrefix() { - return _segmentOverridePrefix; + return _prefixDecoder.HasSegmentOverridePrefix(); } /// @@ -306,7 +200,7 @@ public class InstructionDecoder /// The segment override prefix, or an empty string if none is present public string GetSegmentOverride() { - return _segmentOverride; + return _prefixDecoder.GetSegmentOverride(); } /// @@ -315,7 +209,7 @@ public class InstructionDecoder /// True if the LOCK prefix is present public bool HasLockPrefix() { - return _lockPrefix; + return _prefixDecoder.HasLockPrefix(); } /// @@ -324,7 +218,7 @@ public class InstructionDecoder /// True if the REP/REPNE prefix is present public bool HasRepPrefix() { - return _repPrefix; + return _prefixDecoder.HasRepPrefix(); } /// @@ -352,7 +246,7 @@ public class InstructionDecoder return 0; } - ushort value = BitConverter.ToUInt16(_codeBuffer, _position); + ushort value = (ushort)(_codeBuffer[_position] | (_codeBuffer[_position + 1] << 8)); _position += 2; return value; } @@ -368,7 +262,10 @@ public class InstructionDecoder return 0; } - uint value = BitConverter.ToUInt32(_codeBuffer, _position); + uint value = (uint)(_codeBuffer[_position] | + (_codeBuffer[_position + 1] << 8) | + (_codeBuffer[_position + 2] << 16) | + (_codeBuffer[_position + 3] << 24)); _position += 4; return value; } diff --git a/X86Disassembler/X86/PrefixDecoder.cs b/X86Disassembler/X86/PrefixDecoder.cs new file mode 100644 index 0000000..39f7c8b --- /dev/null +++ b/X86Disassembler/X86/PrefixDecoder.cs @@ -0,0 +1,170 @@ +namespace X86Disassembler.X86; + +/// +/// Handles decoding of instruction prefixes +/// +public class PrefixDecoder +{ + // Prefix flags + private bool _operandSizePrefix; + private bool _addressSizePrefix; + private bool _segmentOverridePrefix; + private bool _lockPrefix; + private bool _repPrefix; + private string _segmentOverride = string.Empty; + + /// + /// Initializes a new instance of the PrefixDecoder class + /// + public PrefixDecoder() + { + Reset(); + } + + /// + /// Resets all prefix flags + /// + public void Reset() + { + _operandSizePrefix = false; + _addressSizePrefix = false; + _segmentOverridePrefix = false; + _lockPrefix = false; + _repPrefix = false; + _segmentOverride = string.Empty; + } + + /// + /// Decodes a prefix byte + /// + /// The prefix byte + /// True if the byte was a prefix, false otherwise + public bool DecodePrefix(byte prefix) + { + if (prefix == 0x66) // Operand size prefix + { + _operandSizePrefix = true; + return true; + } + else if (prefix == 0x67) // Address size prefix + { + _addressSizePrefix = true; + return true; + } + else if ((prefix >= 0x26 && prefix <= 0x3E && (prefix & 0x7) == 0x6) || prefix == 0x64 || prefix == 0x65) // Segment override prefix + { + _segmentOverridePrefix = true; + switch (prefix) + { + case 0x26: _segmentOverride = "es"; break; + case 0x2E: _segmentOverride = "cs"; break; + case 0x36: _segmentOverride = "ss"; break; + case 0x3E: _segmentOverride = "ds"; break; + case 0x64: _segmentOverride = "fs"; break; + case 0x65: _segmentOverride = "gs"; break; + } + return true; + } + else if (prefix == 0xF0) // LOCK prefix + { + _lockPrefix = true; + return true; + } + else if (prefix == 0xF2 || prefix == 0xF3) // REP/REPNE prefix + { + _repPrefix = true; + return true; + } + + return false; + } + + /// + /// Checks if the operand size prefix is present + /// + /// True if the operand size prefix is present + public bool HasOperandSizePrefix() + { + return _operandSizePrefix; + } + + /// + /// Checks if the address size prefix is present + /// + /// True if the address size prefix is present + public bool HasAddressSizePrefix() + { + return _addressSizePrefix; + } + + /// + /// Checks if a segment override prefix is present + /// + /// True if a segment override prefix is present + public bool HasSegmentOverridePrefix() + { + return _segmentOverridePrefix; + } + + /// + /// Gets the segment override prefix + /// + /// The segment override prefix, or an empty string if none is present + public string GetSegmentOverride() + { + return _segmentOverride; + } + + /// + /// Checks if the LOCK prefix is present + /// + /// True if the LOCK prefix is present + public bool HasLockPrefix() + { + return _lockPrefix; + } + + /// + /// Checks if the REP/REPNE prefix is present + /// + /// True if the REP/REPNE prefix is present + public bool HasRepPrefix() + { + return _repPrefix; + } + + /// + /// Applies the segment override prefix to the operands string if applicable + /// + /// The operands string + /// The operands string with segment override applied + public string ApplySegmentOverride(string operands) + { + if (_segmentOverridePrefix && !string.IsNullOrEmpty(operands)) + { + // If the instruction has memory operands, add the segment override + if (operands.Contains("[")) + { + // Replace the first '[' with the segment override + return operands.Replace("[", $"{_segmentOverride}:[" ); + } + } + + return operands; + } + + /// + /// Applies the REP prefix to the mnemonic if applicable + /// + /// The mnemonic + /// The mnemonic with REP prefix applied + public string ApplyRepPrefix(string mnemonic) + { + if (_repPrefix && !mnemonic.StartsWith("rep")) + { + return $"rep {mnemonic}"; + } + + return mnemonic; + } +} diff --git a/X86Disassembler/X86/StringInstructionDecoder.cs b/X86Disassembler/X86/StringInstructionDecoder.cs new file mode 100644 index 0000000..7c893d4 --- /dev/null +++ b/X86Disassembler/X86/StringInstructionDecoder.cs @@ -0,0 +1,98 @@ +namespace X86Disassembler.X86; + +/// +/// Handles decoding of string instructions +/// +public class StringInstructionDecoder +{ + // The buffer containing the code to decode + private readonly byte[] _codeBuffer; + + // The length of the buffer + private readonly int _length; + + /// + /// Initializes a new instance of the StringInstructionDecoder class + /// + /// The buffer containing the code to decode + /// The length of the buffer + public StringInstructionDecoder(byte[] codeBuffer, int length) + { + _codeBuffer = codeBuffer; + _length = length; + } + + /// + /// Checks if the opcode is a string instruction + /// + /// The opcode to check + /// True if the opcode is a string instruction + public bool IsStringInstruction(byte opcode) + { + return opcode == 0xA4 || opcode == 0xA5 || // MOVS + opcode == 0xAA || opcode == 0xAB || // STOS + opcode == 0xAC || opcode == 0xAD || // LODS + opcode == 0xAE || opcode == 0xAF; // SCAS + } + + /// + /// Creates an instruction for a string operation with REP/REPNE prefix + /// + /// The REP/REPNE prefix (0xF2 or 0xF3) + /// The string operation opcode + /// The start position of the instruction + /// The current position after reading the string opcode + /// The created instruction + public Instruction CreateStringInstruction(byte prefix, byte stringOp, int startPosition, int currentPosition) + { + // Create a new instruction + Instruction instruction = new Instruction + { + Address = (uint)startPosition, + }; + + // Get the mnemonic for the string operation + string mnemonic = OpcodeMap.GetMnemonic(stringOp); + instruction.Mnemonic = prefix == 0xF3 ? $"rep {mnemonic}" : $"repne {mnemonic}"; + + // Set operands based on the string operation + instruction.Operands = GetStringOperands(stringOp); + + // Set the raw bytes + int length = currentPosition - startPosition; + instruction.RawBytes = new byte[length]; + Array.Copy(_codeBuffer, startPosition, instruction.RawBytes, 0, length); + + return instruction; + } + + /// + /// Gets the operands for a string instruction + /// + /// The string operation opcode + /// The operands string + private string GetStringOperands(byte stringOp) + { + switch (stringOp) + { + case 0xA4: // MOVSB + return "byte ptr [edi], byte ptr [esi]"; + case 0xA5: // MOVSD + return "dword ptr [edi], dword ptr [esi]"; + case 0xAA: // STOSB + return "byte ptr [edi], al"; + case 0xAB: // STOSD + return "dword ptr [edi], eax"; + case 0xAC: // LODSB + return "al, byte ptr [esi]"; + case 0xAD: // LODSD + return "eax, dword ptr [esi]"; + case 0xAE: // SCASB + return "al, byte ptr [edi]"; + case 0xAF: // SCASD + return "eax, dword ptr [edi]"; + default: + return "??"; + } + } +}