From c14a92bf04c285e9da9542efc293691ad7a61704 Mon Sep 17 00:00:00 2001 From: bird_egop Date: Sun, 13 Apr 2025 02:10:48 +0300 Subject: [PATCH] Added support for string instructions with REP prefix, specifically F3 A5 (REP MOVS) --- X86Disassembler/X86/InstructionDecoder.cs | 85 ++++++++++++++++++- X86Disassembler/X86/OpcodeMap.cs | 12 ++- .../StringInstructionHandlerTests.cs | 32 +++++++ 3 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 X86DisassemblerTests/StringInstructionHandlerTests.cs diff --git a/X86Disassembler/X86/InstructionDecoder.cs b/X86Disassembler/X86/InstructionDecoder.cs index fcff641..a8e7725 100644 --- a/X86Disassembler/X86/InstructionDecoder.cs +++ b/X86Disassembler/X86/InstructionDecoder.cs @@ -109,6 +109,23 @@ public class InstructionDecoder { _repPrefix = true; _position++; + + // Special case for string instructions + if (_position < _length) + { + byte stringOp = _codeBuffer[_position]; + if (stringOp == 0xA4 || stringOp == 0xA5 || // MOVS + stringOp == 0xAA || stringOp == 0xAB || // STOS + stringOp == 0xAC || stringOp == 0xAD || // LODS + stringOp == 0xAE || stringOp == 0xAF) // SCAS + { + // Skip the string operation opcode + _position++; + + // Handle REP string instruction + return CreateStringInstruction(prefix, stringOp, startPosition); + } + } } else { @@ -157,6 +174,12 @@ public class InstructionDecoder instruction.Operands = "??"; } + // Add REP prefix to the instruction if present + if (_repPrefix && !instruction.Mnemonic.StartsWith("rep")) + { + instruction.Mnemonic = $"rep {instruction.Mnemonic}"; + } + // Add segment override prefix to the instruction if present if (_segmentOverridePrefix && !string.IsNullOrEmpty(instruction.Operands)) { @@ -169,9 +192,65 @@ public class InstructionDecoder } // Set the raw bytes - int instructionLength = _position - startPosition; - instruction.RawBytes = new byte[instructionLength]; - Array.Copy(_codeBuffer, startPosition, instruction.RawBytes, 0, instructionLength); + int bytesLength = _position - startPosition; + instruction.RawBytes = new byte[bytesLength]; + Array.Copy(_codeBuffer, startPosition, instruction.RawBytes, 0, bytesLength); + + return instruction; + } + + /// + /// Creates an instruction for a string operation with REP/REPNE prefix + /// + /// The REP/REPNE prefix (0xF2 or 0xF3) + /// The string operation opcode + /// The start position of the instruction + /// The created instruction + private Instruction CreateStringInstruction(byte prefix, byte stringOp, int startPosition) + { + // Create a new instruction + Instruction instruction = new Instruction + { + Address = (uint)startPosition, + }; + + // Get the mnemonic for the string operation + string mnemonic = OpcodeMap.GetMnemonic(stringOp); + instruction.Mnemonic = prefix == 0xF3 ? $"rep {mnemonic}" : $"repne {mnemonic}"; + + // Set operands based on the string operation + switch (stringOp) + { + case 0xA4: // MOVSB + instruction.Operands = "byte ptr [edi], byte ptr [esi]"; + break; + case 0xA5: // MOVSD + instruction.Operands = "dword ptr [edi], dword ptr [esi]"; + break; + case 0xAA: // STOSB + instruction.Operands = "byte ptr [edi], al"; + break; + case 0xAB: // STOSD + instruction.Operands = "dword ptr [edi], eax"; + break; + case 0xAC: // LODSB + instruction.Operands = "al, byte ptr [esi]"; + break; + case 0xAD: // LODSD + instruction.Operands = "eax, dword ptr [esi]"; + break; + case 0xAE: // SCASB + instruction.Operands = "al, byte ptr [edi]"; + break; + case 0xAF: // SCASD + instruction.Operands = "eax, dword ptr [edi]"; + break; + } + + // Set the raw bytes + int length = _position - startPosition; + instruction.RawBytes = new byte[length]; + Array.Copy(_codeBuffer, startPosition, instruction.RawBytes, 0, length); return instruction; } diff --git a/X86Disassembler/X86/OpcodeMap.cs b/X86Disassembler/X86/OpcodeMap.cs index 55770c2..1759a20 100644 --- a/X86Disassembler/X86/OpcodeMap.cs +++ b/X86Disassembler/X86/OpcodeMap.cs @@ -118,6 +118,16 @@ public static class OpcodeMap OneByteOpcodes[0xA2] = "mov"; // MOV moffs8, AL OneByteOpcodes[0xA3] = "mov"; // MOV moffs32, EAX + // String instructions + OneByteOpcodes[0xA4] = "movs"; // MOVS byte + OneByteOpcodes[0xA5] = "movs"; // MOVS dword + OneByteOpcodes[0xAA] = "stos"; // STOS byte + OneByteOpcodes[0xAB] = "stos"; // STOS dword + OneByteOpcodes[0xAC] = "lods"; // LODS byte + OneByteOpcodes[0xAD] = "lods"; // LODS dword + OneByteOpcodes[0xAE] = "scas"; // SCAS byte + OneByteOpcodes[0xAF] = "scas"; // SCAS dword + // Control flow instructions OneByteOpcodes[0xCC] = "int3"; OneByteOpcodes[0x90] = "nop"; @@ -178,6 +188,6 @@ public static class OpcodeMap /// The mnemonic public static string GetMnemonic(byte opcode) { - return "TODO UNKNOWN: " + OneByteOpcodes[opcode]; + return OneByteOpcodes[opcode]; } } diff --git a/X86DisassemblerTests/StringInstructionHandlerTests.cs b/X86DisassemblerTests/StringInstructionHandlerTests.cs new file mode 100644 index 0000000..8adce1f --- /dev/null +++ b/X86DisassemblerTests/StringInstructionHandlerTests.cs @@ -0,0 +1,32 @@ +namespace X86DisassemblerTests; + +using System; +using Xunit; +using X86Disassembler.X86; +using X86Disassembler.X86.Handlers.String; + +/// +/// Tests for string instruction handlers +/// +public class StringInstructionHandlerTests +{ + /// + /// Tests the RepMovsHandler for decoding REP MOVS instruction + /// + [Fact] + public void RepMovsHandler_DecodesRepMovs_Correctly() + { + // Arrange + // REP MOVS (F3 A5) + byte[] codeBuffer = new byte[] { 0xF3, 0xA5 }; + var decoder = new InstructionDecoder(codeBuffer, codeBuffer.Length); + + // Act + var instruction = decoder.DecodeInstruction(); + + // Assert + Assert.NotNull(instruction); + Assert.Equal("rep movs", instruction.Mnemonic); + Assert.Equal("dword ptr [edi], dword ptr [esi]", instruction.Operands); + } +}