From e91a0223f741e569b940627515330b43fc6a79b6 Mon Sep 17 00:00:00 2001 From: bird_egop Date: Sun, 13 Apr 2025 18:22:44 +0300 Subject: [PATCH] Refactor SUB handlers --- ParkanPlayground.sln.DotSettings.user | 11 +-- .../X86/Handlers/Sub/SubAlImm8Handler.cs | 54 +++++++++++++ .../X86/Handlers/Sub/SubAxImm16Handler.cs | 21 ++--- .../X86/Handlers/Sub/SubImmFromRm16Handler.cs | 61 ++++++-------- .../Sub/SubImmFromRm16SignExtendedHandler.cs | 58 ++++++-------- .../X86/Handlers/Sub/SubImmFromRm32Handler.cs | 56 ++++++------- .../Sub/SubImmFromRm32SignExtendedHandler.cs | 56 ++++++------- .../X86/Handlers/Sub/SubImmFromRm8Handler.cs | 80 +++++++++++++++++++ .../X86/Handlers/Sub/SubR16Rm16Handler.cs | 27 +++---- .../X86/Handlers/Sub/SubR32Rm32Handler.cs | 11 +-- .../X86/Handlers/Sub/SubR8Rm8Handler.cs | 66 +++++++++++++++ .../X86/Handlers/Sub/SubRm16R16Handler.cs | 70 ++++++++++++++++ .../X86/Handlers/Sub/SubRm32R32Handler.cs | 9 +-- .../X86/Handlers/Sub/SubRm8R8Handler.cs | 66 +++++++++++++++ X86Disassembler/X86/InstructionDecoder.cs | 9 +++ X86Disassembler/X86/OpcodeMap.cs | 2 +- .../InstructionTests/SubRm8Imm8Tests.cs | 22 ++++- X86DisassemblerTests/TestData/nop_tests.csv | 14 ++-- 18 files changed, 493 insertions(+), 200 deletions(-) create mode 100644 X86Disassembler/X86/Handlers/Sub/SubAlImm8Handler.cs create mode 100644 X86Disassembler/X86/Handlers/Sub/SubImmFromRm8Handler.cs create mode 100644 X86Disassembler/X86/Handlers/Sub/SubR8Rm8Handler.cs create mode 100644 X86Disassembler/X86/Handlers/Sub/SubRm16R16Handler.cs create mode 100644 X86Disassembler/X86/Handlers/Sub/SubRm8R8Handler.cs diff --git a/ParkanPlayground.sln.DotSettings.user b/ParkanPlayground.sln.DotSettings.user index 9c4328e..7d73feb 100644 --- a/ParkanPlayground.sln.DotSettings.user +++ b/ParkanPlayground.sln.DotSettings.user @@ -20,10 +20,7 @@ <Project Location="C:\Projects\CSharp\ParkanPlayground\X86DisassemblerTests" Presentation="&lt;X86DisassemblerTests&gt;" /> </And> </SessionState> - <SessionState ContinuousTestingMode="0" IsActive="True" Name="RunTestsOnJson" xmlns="urn:schemas-jetbrains-com:jetbrains-ut-session"> - <TestAncestor> - <TestId>xUnit::D6A1F5A9-0C7A-4F8F-B8C5-83E9D3F3A1D5::net8.0::X86DisassemblerTests.GeneralDisassemblerInstructionTests.RunTestsOnJson</TestId> - <TestId>xUnit::D6A1F5A9-0C7A-4F8F-B8C5-83E9D3F3A1D5::net8.0::X86DisassemblerTests.PushRegTests.PushRegTests</TestId> - <TestId>xUnit::D6A1F5A9-0C7A-4F8F-B8C5-83E9D3F3A1D5::net8.0::X86DisassemblerTests.RawFromFileDisassemblyTests</TestId> - </TestAncestor> -</SessionState> \ No newline at end of file + <SessionState ContinuousTestingMode="0" IsActive="True" Name="All tests from &lt;X86DisassemblerTests&gt;" xmlns="urn:schemas-jetbrains-com:jetbrains-ut-session"> + <Project Location="C:\Projects\CSharp\ParkanPlayground\X86DisassemblerTests" Presentation="&lt;X86DisassemblerTests&gt;" /> +</SessionState> + \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubAlImm8Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubAlImm8Handler.cs new file mode 100644 index 0000000..6df1482 --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubAlImm8Handler.cs @@ -0,0 +1,54 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB AL, imm8 instruction (0x2C) +/// +public class SubAlImm8Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubAlImm8Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubAlImm8Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + return opcode == 0x2C; + } + + /// + /// Decodes a SUB AL, imm8 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the immediate byte + byte imm8 = CodeBuffer[position++]; + Decoder.SetPosition(position); + + // Set the instruction information + instruction.Mnemonic = "sub"; + instruction.Operands = $"al, 0x{imm8:X2}"; + + return true; + } +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubAxImm16Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubAxImm16Handler.cs index 4c17bd8..b8f09b5 100644 --- a/X86Disassembler/X86/Handlers/Sub/SubAxImm16Handler.cs +++ b/X86Disassembler/X86/Handlers/Sub/SubAxImm16Handler.cs @@ -37,28 +37,21 @@ public class SubAxImm16Handler : InstructionHandler { // Set the mnemonic instruction.Mnemonic = "sub"; - + int position = Decoder.GetPosition(); - + // Check if we have enough bytes for the immediate value if (position + 1 >= Length) { return false; } - + // Read the immediate value (16-bit) - byte lowByte = CodeBuffer[position++]; - byte highByte = CodeBuffer[position++]; - - // Combine the bytes into a 16-bit value - ushort immediate = (ushort)((highByte << 8) | lowByte); - - // Update the decoder position - Decoder.SetPosition(position); - + var immediate = Decoder.ReadUInt16(); + // Set the operands (note: we use "eax" instead of "ax" to match the disassembler's output) instruction.Operands = $"eax, 0x{immediate:X4}"; - + return true; } -} +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16Handler.cs index 5c5c7db..b7f15b6 100644 --- a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16Handler.cs +++ b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16Handler.cs @@ -36,32 +36,27 @@ public class SubImmFromRm16Handler : InstructionHandler public override bool Decode(byte opcode, Instruction instruction) { int position = Decoder.GetPosition(); - + if (position >= Length) { return false; } - - // Read the ModR/M byte - byte modRM = CodeBuffer[position++]; - + // Extract the fields from the ModR/M byte - byte mod = (byte)((modRM & 0xC0) >> 6); - byte reg = (byte)((modRM & 0x38) >> 3); - byte rm = (byte)(modRM & 0x07); - + var (mod, reg, rm, operand) = ModRMDecoder.ReadModRM(); + // Check if this is a SUB instruction (reg field must be 5) if (reg != 5) { return false; } - + // Set the mnemonic instruction.Mnemonic = "sub"; - + // Update the decoder position Decoder.SetPosition(position); - + // For mod == 3, the r/m field specifies a register string destination; if (mod == 3) @@ -73,43 +68,37 @@ public class SubImmFromRm16Handler : InstructionHandler { // Get the memory operand string destination = ModRMDecoder.DecodeModRM(mod, rm, false); - + // Replace "dword" with "word" in the memory operand destination = destination.Replace("dword", "word"); } - + // Get the current position after processing the ModR/M byte position = Decoder.GetPosition(); - + // Check if we have enough bytes for the immediate value if (position + 1 >= Length) { return false; } - + // Read the immediate value (16-bit) - byte lowByte = CodeBuffer[position++]; - byte highByte = CodeBuffer[position++]; - - // Combine the bytes into a 16-bit value - ushort immediate = (ushort)((highByte << 8) | lowByte); - - // Update the decoder position - Decoder.SetPosition(position); - + ushort immediate = Decoder.ReadUInt16(); + // Set the operands (note: we use 32-bit register names to match the disassembler's output) if (mod == 3) { // For register operands, use the 32-bit register name - string reg32Name = destination.Replace("ax", "eax") - .Replace("bx", "ebx") - .Replace("cx", "ecx") - .Replace("dx", "edx") - .Replace("sp", "esp") - .Replace("bp", "ebp") - .Replace("si", "esi") - .Replace("di", "edi"); - + string reg32Name = destination + .Replace("ax", "eax") + .Replace("bx", "ebx") + .Replace("cx", "ecx") + .Replace("dx", "edx") + .Replace("sp", "esp") + .Replace("bp", "ebp") + .Replace("si", "esi") + .Replace("di", "edi"); + instruction.Operands = $"{reg32Name}, 0x{immediate:X4}"; } else @@ -117,7 +106,7 @@ public class SubImmFromRm16Handler : InstructionHandler // For memory operands, keep the memory operand as is instruction.Operands = $"{destination}, 0x{immediate:X4}"; } - + return true; } -} +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16SignExtendedHandler.cs b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16SignExtendedHandler.cs index ae9b34a..4fd35c8 100644 --- a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16SignExtendedHandler.cs +++ b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16SignExtendedHandler.cs @@ -36,32 +36,24 @@ public class SubImmFromRm16SignExtendedHandler : InstructionHandler public override bool Decode(byte opcode, Instruction instruction) { int position = Decoder.GetPosition(); - + if (position >= Length) { return false; } - - // Read the ModR/M byte - byte modRM = CodeBuffer[position++]; - + // Extract the fields from the ModR/M byte - byte mod = (byte)((modRM & 0xC0) >> 6); - byte reg = (byte)((modRM & 0x38) >> 3); - byte rm = (byte)(modRM & 0x07); - + var (mod, reg, rm, operand) = ModRMDecoder.ReadModRM(); + // Check if this is a SUB instruction (reg field must be 5) if (reg != 5) { return false; } - + // Set the mnemonic instruction.Mnemonic = "sub"; - - // Update the decoder position - Decoder.SetPosition(position); - + // For mod == 3, the r/m field specifies a register string destination; if (mod == 3) @@ -73,39 +65,37 @@ public class SubImmFromRm16SignExtendedHandler : InstructionHandler { // Get the memory operand string destination = ModRMDecoder.DecodeModRM(mod, rm, false); - + // Replace "dword" with "word" in the memory operand destination = destination.Replace("dword", "word"); } - + // Get the current position after processing the ModR/M byte position = Decoder.GetPosition(); - + // Check if we have enough bytes for the immediate value if (position >= Length) { return false; } - + // Read the immediate value (8-bit) - byte immediate = CodeBuffer[position++]; - - // Update the decoder position - Decoder.SetPosition(position); - + byte immediate = Decoder.ReadByte(); + // Set the operands (note: we use 32-bit register names to match the disassembler's output) if (mod == 3) { // For register operands, use the 32-bit register name - string reg32Name = destination.Replace("ax", "eax") - .Replace("bx", "ebx") - .Replace("cx", "ecx") - .Replace("dx", "edx") - .Replace("sp", "esp") - .Replace("bp", "ebp") - .Replace("si", "esi") - .Replace("di", "edi"); - + string reg32Name = destination + .Replace("ax", "eax") + .Replace("bx", "ebx") + .Replace("cx", "ecx") + .Replace("dx", "edx") + .Replace("sp", "esp") + .Replace("bp", "ebp") + .Replace("si", "esi") + .Replace("di", "edi"); + instruction.Operands = $"{reg32Name}, 0x{immediate:X2}"; } else @@ -113,7 +103,7 @@ public class SubImmFromRm16SignExtendedHandler : InstructionHandler // For memory operands, keep the memory operand as is instruction.Operands = $"{destination}, 0x{immediate:X2}"; } - + return true; } -} +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm32Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm32Handler.cs index 6ec7242..51dee8c 100644 --- a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm32Handler.cs +++ b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm32Handler.cs @@ -11,11 +11,11 @@ public class SubImmFromRm32Handler : InstructionHandler /// The buffer containing the code to decode /// The instruction decoder that owns this handler /// The length of the buffer - public SubImmFromRm32Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + public SubImmFromRm32Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) : base(codeBuffer, decoder, length) { } - + /// /// Checks if this handler can decode the given opcode /// @@ -25,18 +25,18 @@ public class SubImmFromRm32Handler : InstructionHandler { if (opcode != 0x81) return false; - + // Check if the reg field of the ModR/M byte is 5 (SUB) int position = Decoder.GetPosition(); if (position >= Length) return false; - + byte modRM = CodeBuffer[position]; - byte reg = (byte)((modRM & 0x38) >> 3); - + byte reg = (byte) ((modRM & 0x38) >> 3); + return reg == 5; // 5 = SUB } - + /// /// Decodes a SUB r/m32, imm32 instruction /// @@ -47,53 +47,43 @@ public class SubImmFromRm32Handler : InstructionHandler { // Set the mnemonic instruction.Mnemonic = "sub"; - + int position = Decoder.GetPosition(); - + if (position >= Length) { return false; } - + // Read the ModR/M byte - byte modRM = CodeBuffer[position++]; - Decoder.SetPosition(position); - + // Extract the fields from the ModR/M byte - byte mod = (byte)((modRM & 0xC0) >> 6); - byte reg = (byte)((modRM & 0x38) >> 3); // Should be 5 for SUB - byte rm = (byte)(modRM & 0x07); - // Let the ModRMDecoder handle the ModR/M byte and any additional bytes (SIB, displacement) // This will update the decoder position to point after the ModR/M and any additional bytes - string destOperand = ModRMDecoder.DecodeModRM(mod, rm, false); - + var (mod, reg, rm, destOperand) = ModRMDecoder.ReadModRM(); + // Get the updated position after ModR/M decoding position = Decoder.GetPosition(); - + // Read the immediate value if (position + 3 >= Length) { return false; } - + // Read the immediate value in little-endian format - byte b0 = CodeBuffer[position]; - byte b1 = CodeBuffer[position + 1]; - byte b2 = CodeBuffer[position + 2]; - byte b3 = CodeBuffer[position + 3]; - - // Format the immediate value as expected by the tests (0x12345678) - // Note: Always use the same format regardless of operand type to match test expectations - string immStr = $"0x{b3:X2}{b2:X2}{b1:X2}{b0:X2}"; - + var imm = Decoder.ReadUInt32(); + + // Format the immediate value + string immStr = $"0x{imm:X8}"; + // Advance the position past the immediate value position += 4; Decoder.SetPosition(position); - + // Set the operands instruction.Operands = $"{destOperand}, {immStr}"; - + return true; } -} +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm32SignExtendedHandler.cs b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm32SignExtendedHandler.cs index a88cc4f..344dcd3 100644 --- a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm32SignExtendedHandler.cs +++ b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm32SignExtendedHandler.cs @@ -11,11 +11,11 @@ public class SubImmFromRm32SignExtendedHandler : InstructionHandler /// The buffer containing the code to decode /// The instruction decoder that owns this handler /// The length of the buffer - public SubImmFromRm32SignExtendedHandler(byte[] codeBuffer, InstructionDecoder decoder, int length) + public SubImmFromRm32SignExtendedHandler(byte[] codeBuffer, InstructionDecoder decoder, int length) : base(codeBuffer, decoder, length) { } - + /// /// Checks if this handler can decode the given opcode /// @@ -25,18 +25,18 @@ public class SubImmFromRm32SignExtendedHandler : InstructionHandler { if (opcode != 0x83) return false; - + // Check if the reg field of the ModR/M byte is 5 (SUB) int position = Decoder.GetPosition(); if (position >= Length) return false; - + byte modRM = CodeBuffer[position]; - byte reg = (byte)((modRM & 0x38) >> 3); - + byte reg = (byte) ((modRM & 0x38) >> 3); + return reg == 5; // 5 = SUB } - + /// /// Decodes a SUB r/m32, imm8 (sign-extended) instruction /// @@ -47,49 +47,41 @@ public class SubImmFromRm32SignExtendedHandler : InstructionHandler { // Set the mnemonic instruction.Mnemonic = "sub"; - + int position = Decoder.GetPosition(); - + if (position >= Length) { return false; } - - // Read the ModR/M byte - byte modRM = CodeBuffer[position++]; - Decoder.SetPosition(position); - + // Extract the fields from the ModR/M byte - byte mod = (byte)((modRM & 0xC0) >> 6); - byte reg = (byte)((modRM & 0x38) >> 3); // Should be 5 for SUB - byte rm = (byte)(modRM & 0x07); - + var (mod, reg, rm, destOperand) = ModRMDecoder.ReadModRM(); + // Let the ModRMDecoder handle the ModR/M byte and any additional bytes (SIB, displacement) // This will update the decoder position to point after the ModR/M and any additional bytes - string destOperand = ModRMDecoder.DecodeModRM(mod, rm, false); - + // Get the updated position after ModR/M decoding position = Decoder.GetPosition(); - + // Read the immediate value if (position >= Length) { return false; } - + // Read the immediate value as a signed byte and sign-extend it to 32 bits - sbyte imm8 = (sbyte)CodeBuffer[position++]; + sbyte imm8 = (sbyte) Decoder.ReadByte(); int imm32 = imm8; // Automatic sign extension from sbyte to int - Decoder.SetPosition(position); - + // Format the immediate value based on the operand type and value string immStr; - + // For memory operands, use a different format as expected by the tests if (mod != 3) // Memory operand { // For memory operands, use the actual value as specified in the test - immStr = $"0x{(byte)imm8:X2}"; + immStr = $"0x{(byte) imm8:X2}"; } else // Register operand { @@ -97,18 +89,18 @@ public class SubImmFromRm32SignExtendedHandler : InstructionHandler if (imm8 < 0) { // For negative values, show the full 32-bit representation with 8-digit padding - immStr = $"0x{(uint)imm32:X8}"; + immStr = $"0x{(uint) imm32:X8}"; } else { // For positive values, just show the value with 2-digit padding for consistency - immStr = $"0x{(byte)imm8:X2}"; + immStr = $"0x{(byte) imm8:X2}"; } } - + // Set the operands instruction.Operands = $"{destOperand}, {immStr}"; - + return true; } -} +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm8Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm8Handler.cs new file mode 100644 index 0000000..d96860c --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm8Handler.cs @@ -0,0 +1,80 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB r/m8, imm8 instruction (0x80 /5) +/// +public class SubImmFromRm8Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubImmFromRm8Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubImmFromRm8Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + if (opcode != 0x80) + return false; + + // Check if the reg field of the ModR/M byte is 5 (SUB) + int position = Decoder.GetPosition(); + if (position >= Length) + return false; + + byte modRM = CodeBuffer[position]; + byte reg = (byte) ((modRM & 0x38) >> 3); + + return reg == 5; // 5 = SUB + } + + /// + /// Decodes a SUB r/m8, imm8 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the mnemonic + instruction.Mnemonic = "sub"; + + // Extract the fields from the ModR/M byte + var (mod, reg, rm, operand) = ModRMDecoder.ReadModRM(); + + // Read the immediate byte + var position = Decoder.GetPosition(); + if (position >= Length) + { + return false; + } + + byte imm8 = CodeBuffer[position++]; + Decoder.SetPosition(position); + + // Set the instruction information + // For mod == 3, the operand is a register + if (mod == 3) + { + string rmRegName = ModRMDecoder.GetRegisterName(rm, 8); + instruction.Operands = $"{rmRegName}, 0x{imm8:X2}"; + } + else // Memory operand + { + // Get the memory operand string + string memOperand = ModRMDecoder.DecodeModRM(mod, rm, false); + instruction.Operands = $"byte ptr {memOperand}, 0x{imm8:X2}"; + } + + return true; + } +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubR16Rm16Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubR16Rm16Handler.cs index 7a40a4d..9089784 100644 --- a/X86Disassembler/X86/Handlers/Sub/SubR16Rm16Handler.cs +++ b/X86Disassembler/X86/Handlers/Sub/SubR16Rm16Handler.cs @@ -37,26 +37,20 @@ public class SubR16Rm16Handler : InstructionHandler { // Set the mnemonic instruction.Mnemonic = "sub"; - + int position = Decoder.GetPosition(); - + if (position >= Length) { return false; } - + // Read the ModR/M byte - byte modRM = CodeBuffer[position++]; - Decoder.SetPosition(position); - - // Extract the fields from the ModR/M byte - byte mod = (byte)((modRM & 0xC0) >> 6); - byte reg = (byte)((modRM & 0x38) >> 3); - byte rm = (byte)(modRM & 0x07); - + var (mod, reg, rm, memOperand) = ModRMDecoder.ReadModRM(); + // Get register name (16-bit) string regName = ModRMDecoder.GetRegisterName(reg, 16); - + // For mod == 3, both operands are registers if (mod == 3) { @@ -65,15 +59,12 @@ public class SubR16Rm16Handler : InstructionHandler } else // Memory operand { - // Get the memory operand string (use false for is64Bit) - string memOperand = ModRMDecoder.DecodeModRM(mod, rm, false); - // Replace "dword" with "word" in the memory operand memOperand = memOperand.Replace("dword", "word"); - + instruction.Operands = $"{regName}, {memOperand}"; } - + return true; } -} +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubR32Rm32Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubR32Rm32Handler.cs index d8f1598..cb3777d 100644 --- a/X86Disassembler/X86/Handlers/Sub/SubR32Rm32Handler.cs +++ b/X86Disassembler/X86/Handlers/Sub/SubR32Rm32Handler.cs @@ -42,13 +42,7 @@ public class SubR32Rm32Handler : InstructionHandler } // Read the ModR/M byte - byte modRM = CodeBuffer[position++]; - Decoder.SetPosition(position); - - // Extract the fields from the ModR/M byte - byte mod = (byte)((modRM & 0xC0) >> 6); - byte reg = (byte)((modRM & 0x38) >> 3); - byte rm = (byte)(modRM & 0x07); + var (mod, reg, rm, operand) = ModRMDecoder.ReadModRM(); // Set the mnemonic instruction.Mnemonic = "sub"; @@ -59,7 +53,6 @@ public class SubR32Rm32Handler : InstructionHandler // For memory operands, set the operand if (mod != 3) // Memory operand { - string operand = ModRMDecoder.DecodeModRM(mod, rm, false); instruction.Operands = $"{regName}, {operand}"; } else // Register operand @@ -70,4 +63,4 @@ public class SubR32Rm32Handler : InstructionHandler return true; } -} +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubR8Rm8Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubR8Rm8Handler.cs new file mode 100644 index 0000000..86f8df9 --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubR8Rm8Handler.cs @@ -0,0 +1,66 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB r8, r/m8 instruction (0x2A) +/// +public class SubR8Rm8Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubR8Rm8Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubR8Rm8Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + return opcode == 0x2A; + } + + /// + /// Decodes a SUB r8, r/m8 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the mnemonic + instruction.Mnemonic = "sub"; + + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the ModR/M byte + var (mod, reg, rm, memOperand) = ModRMDecoder.ReadModRM(); + + // Get register name + string regName = ModRMDecoder.GetRegisterName(reg, 8); + + // For mod == 3, both operands are registers + if (mod == 3) + { + string rmRegName = ModRMDecoder.GetRegisterName(rm, 8); + instruction.Operands = $"{regName}, {rmRegName}"; + } + else // Memory operand + { + instruction.Operands = $"{regName}, byte ptr {memOperand}"; + } + + return true; + } +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubRm16R16Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubRm16R16Handler.cs new file mode 100644 index 0000000..3198f84 --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubRm16R16Handler.cs @@ -0,0 +1,70 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB r/m16, r16 instruction (0x29 with 0x66 prefix) +/// +public class SubRm16R16Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubRm16R16Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubRm16R16Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // Check if the opcode is 0x29 and we have a 0x66 prefix + return opcode == 0x29 && Decoder.HasOperandSizeOverridePrefix(); + } + + /// + /// Decodes a SUB r/m16, r16 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the mnemonic + instruction.Mnemonic = "sub"; + + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the ModR/M byte + var (mod, reg, rm, memOperand) = ModRMDecoder.ReadModRM(); + + // Get register name (16-bit) + string regName = ModRMDecoder.GetRegisterName(reg, 16); + + // For mod == 3, both operands are registers + if (mod == 3) + { + string rmRegName = ModRMDecoder.GetRegisterName(rm, 16); + instruction.Operands = $"{rmRegName}, {regName}"; + } + else // Memory operand + { + // Replace "dword" with "word" in the memory operand + memOperand = memOperand.Replace("dword", "word"); + + instruction.Operands = $"{memOperand}, {regName}"; + } + + return true; + } +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Sub/SubRm32R32Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubRm32R32Handler.cs index e47688c..6eb7825 100644 --- a/X86Disassembler/X86/Handlers/Sub/SubRm32R32Handler.cs +++ b/X86Disassembler/X86/Handlers/Sub/SubRm32R32Handler.cs @@ -42,13 +42,9 @@ public class SubRm32R32Handler : InstructionHandler } // Read the ModR/M byte - byte modRM = CodeBuffer[position++]; - Decoder.SetPosition(position); - + // Extract the fields from the ModR/M byte - byte mod = (byte)((modRM & 0xC0) >> 6); - byte reg = (byte)((modRM & 0x38) >> 3); - byte rm = (byte)(modRM & 0x07); + var (mod, reg, rm, operand) = ModRMDecoder.ReadModRM(); // Set the mnemonic instruction.Mnemonic = "sub"; @@ -59,7 +55,6 @@ public class SubRm32R32Handler : InstructionHandler // For memory operands, set the operand if (mod != 3) // Memory operand { - string operand = ModRMDecoder.DecodeModRM(mod, rm, false); instruction.Operands = $"{operand}, {regName}"; } else // Register operand diff --git a/X86Disassembler/X86/Handlers/Sub/SubRm8R8Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubRm8R8Handler.cs new file mode 100644 index 0000000..bfae22e --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubRm8R8Handler.cs @@ -0,0 +1,66 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB r/m8, r8 instruction (0x28) +/// +public class SubRm8R8Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubRm8R8Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubRm8R8Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + return opcode == 0x28; + } + + /// + /// Decodes a SUB r/m8, r8 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the mnemonic + instruction.Mnemonic = "sub"; + + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the ModR/M byte + var (mod, reg, rm, memOperand) = ModRMDecoder.ReadModRM(); + + // Get register name + string regName = ModRMDecoder.GetRegisterName(reg, 8); + + // For mod == 3, both operands are registers + if (mod == 3) + { + string rmRegName = ModRMDecoder.GetRegisterName(rm, 8); + instruction.Operands = $"{rmRegName}, {regName}"; + } + else // Memory operand + { + instruction.Operands = $"byte ptr {memOperand}, {regName}"; + } + + return true; + } +} \ No newline at end of file diff --git a/X86Disassembler/X86/InstructionDecoder.cs b/X86Disassembler/X86/InstructionDecoder.cs index 4e77766..2adcc7f 100644 --- a/X86Disassembler/X86/InstructionDecoder.cs +++ b/X86Disassembler/X86/InstructionDecoder.cs @@ -215,6 +215,15 @@ public class InstructionDecoder return _prefixDecoder.HasRepPrefix(); } + /// + /// Checks if the instruction has an operand size override prefix (0x66) + /// + /// True if the instruction has an operand size override prefix + public bool HasOperandSizeOverridePrefix() + { + return _prefixDecoder.HasOperandSizePrefix(); + } + /// /// Reads a byte from the buffer and advances the position /// diff --git a/X86Disassembler/X86/OpcodeMap.cs b/X86Disassembler/X86/OpcodeMap.cs index 1759a20..aff62d6 100644 --- a/X86Disassembler/X86/OpcodeMap.cs +++ b/X86Disassembler/X86/OpcodeMap.cs @@ -188,6 +188,6 @@ public static class OpcodeMap /// The mnemonic public static string GetMnemonic(byte opcode) { - return OneByteOpcodes[opcode]; + return "NO HANDLER: " + OneByteOpcodes[opcode]; } } diff --git a/X86DisassemblerTests/InstructionTests/SubRm8Imm8Tests.cs b/X86DisassemblerTests/InstructionTests/SubRm8Imm8Tests.cs index 36c8067..a30d54e 100644 --- a/X86DisassemblerTests/InstructionTests/SubRm8Imm8Tests.cs +++ b/X86DisassemblerTests/InstructionTests/SubRm8Imm8Tests.cs @@ -1,6 +1,24 @@ -namespace X86DisassemblerTests.InstructionTests; +using X86Disassembler.X86; + +namespace X86DisassemblerTests.InstructionTests; public class SubRm8Imm8Tests { - + [Fact] + public void SubRm8Imm8_Decodes_Correctly() + { + // Arrange + // SUB BL, 0x42 + byte[] codeBuffer = new byte[] { 0x80, 0xeb, 0x42 }; + var decoder = new Disassembler(codeBuffer, 0x1000); + + // Act + var instructions = decoder.Disassemble(); + + // Assert + Assert.Single(instructions); + Assert.NotNull(instructions[0]); + Assert.Equal("sub", instructions[0].Mnemonic); + Assert.Equal("bl, 0x42", instructions[0].Operands); + } } \ No newline at end of file diff --git a/X86DisassemblerTests/TestData/nop_tests.csv b/X86DisassemblerTests/TestData/nop_tests.csv index 406e306..6f8a790 100644 --- a/X86DisassemblerTests/TestData/nop_tests.csv +++ b/X86DisassemblerTests/TestData/nop_tests.csv @@ -6,26 +6,26 @@ RawBytes;Instructions 90;[{ "Mnemonic": "nop", "Operands": "" }] # Multi-byte NOP instructions (used for alignment) -# 2-byte NOP +# 2-byte NOP (xchg AX, AX) 6690;[{ "Mnemonic": "nop", "Operands": "" }] # 3-byte NOP (XCHG EAX, EAX) 0F1F00;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax]" }] # 4-byte NOP -0F1F4000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax+0x00]" }] +0F1F4000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax]" }] # 5-byte NOP -0F1F440000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax+eax]" }] +0F1F440000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax+eax*1]" }] # 6-byte NOP -660F1F440000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax+eax]" }] +660F1F440000;[{ "Mnemonic": "nop", "Operands": "word ptr [eax+eax*1]" }] # 7-byte NOP -0F1F8000000000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax+0x00000000]" }] +0F1F8000000000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax]" }] # 8-byte NOP -0F1F840000000000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax+eax]" }] +0F1F840000000000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax+eax*1]" }] # 9-byte NOP -660F1F840000000000;[{ "Mnemonic": "nop", "Operands": "dword ptr [eax+eax]" }] +660F1F840000000000;[{ "Mnemonic": "nop", "Operands": "word ptr [eax+eax*1]" }]