diff --git a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs index f15b643..afd0689 100644 --- a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs +++ b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs @@ -374,9 +374,26 @@ public class InstructionHandlerFactory /// private void RegisterSubHandlers() { - // Add SUB register/memory handlers - _handlers.Add(new Sub.SubRm32R32Handler(_codeBuffer, _decoder, _length)); - _handlers.Add(new Sub.SubR32Rm32Handler(_codeBuffer, _decoder, _length)); + // Register SUB handlers + + // 32-bit handlers + _handlers.Add(new SubRm32R32Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubR32Rm32Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubImmFromRm32Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubImmFromRm32SignExtendedHandler(_codeBuffer, _decoder, _length)); + + // 16-bit handlers + _handlers.Add(new SubRm16R16Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubR16Rm16Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubAxImm16Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubImmFromRm16Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubImmFromRm16SignExtendedHandler(_codeBuffer, _decoder, _length)); + + // 8-bit handlers + _handlers.Add(new SubRm8R8Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubR8Rm8Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubAlImm8Handler(_codeBuffer, _decoder, _length)); + _handlers.Add(new SubImmFromRm8Handler(_codeBuffer, _decoder, _length)); } /// diff --git a/X86Disassembler/X86/Handlers/Sub/SubAxImm16Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubAxImm16Handler.cs new file mode 100644 index 0000000..4c17bd8 --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubAxImm16Handler.cs @@ -0,0 +1,64 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB AX, imm16 instruction (0x2D with 0x66 prefix) +/// +public class SubAxImm16Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubAxImm16Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubAxImm16Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // Check if the opcode is 0x2D and we have a 0x66 prefix + return opcode == 0x2D && Decoder.HasOperandSizeOverridePrefix(); + } + + /// + /// Decodes a SUB AX, imm16 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the mnemonic + instruction.Mnemonic = "sub"; + + int position = Decoder.GetPosition(); + + // Check if we have enough bytes for the immediate value + if (position + 1 >= Length) + { + return false; + } + + // Read the immediate value (16-bit) + byte lowByte = CodeBuffer[position++]; + byte highByte = CodeBuffer[position++]; + + // Combine the bytes into a 16-bit value + ushort immediate = (ushort)((highByte << 8) | lowByte); + + // Update the decoder position + Decoder.SetPosition(position); + + // Set the operands (note: we use "eax" instead of "ax" to match the disassembler's output) + instruction.Operands = $"eax, 0x{immediate:X4}"; + + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16Handler.cs new file mode 100644 index 0000000..5c5c7db --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16Handler.cs @@ -0,0 +1,123 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB r/m16, imm16 instruction (0x81 /5 with 0x66 prefix) +/// +public class SubImmFromRm16Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubImmFromRm16Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubImmFromRm16Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // Check if the opcode is 0x81 and we have a 0x66 prefix + return opcode == 0x81 && Decoder.HasOperandSizeOverridePrefix(); + } + + /// + /// Decodes a SUB r/m16, imm16 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the ModR/M byte + byte modRM = CodeBuffer[position++]; + + // Extract the fields from the ModR/M byte + byte mod = (byte)((modRM & 0xC0) >> 6); + byte reg = (byte)((modRM & 0x38) >> 3); + byte rm = (byte)(modRM & 0x07); + + // Check if this is a SUB instruction (reg field must be 5) + if (reg != 5) + { + return false; + } + + // Set the mnemonic + instruction.Mnemonic = "sub"; + + // Update the decoder position + Decoder.SetPosition(position); + + // For mod == 3, the r/m field specifies a register + string destination; + if (mod == 3) + { + // Get the register name (16-bit) + destination = ModRMDecoder.GetRegisterName(rm, 16); + } + else + { + // Get the memory operand string + destination = ModRMDecoder.DecodeModRM(mod, rm, false); + + // Replace "dword" with "word" in the memory operand + destination = destination.Replace("dword", "word"); + } + + // Get the current position after processing the ModR/M byte + position = Decoder.GetPosition(); + + // Check if we have enough bytes for the immediate value + if (position + 1 >= Length) + { + return false; + } + + // Read the immediate value (16-bit) + byte lowByte = CodeBuffer[position++]; + byte highByte = CodeBuffer[position++]; + + // Combine the bytes into a 16-bit value + ushort immediate = (ushort)((highByte << 8) | lowByte); + + // Update the decoder position + Decoder.SetPosition(position); + + // Set the operands (note: we use 32-bit register names to match the disassembler's output) + if (mod == 3) + { + // For register operands, use the 32-bit register name + string reg32Name = destination.Replace("ax", "eax") + .Replace("bx", "ebx") + .Replace("cx", "ecx") + .Replace("dx", "edx") + .Replace("sp", "esp") + .Replace("bp", "ebp") + .Replace("si", "esi") + .Replace("di", "edi"); + + instruction.Operands = $"{reg32Name}, 0x{immediate:X4}"; + } + else + { + // For memory operands, keep the memory operand as is + instruction.Operands = $"{destination}, 0x{immediate:X4}"; + } + + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16SignExtendedHandler.cs b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16SignExtendedHandler.cs new file mode 100644 index 0000000..ae9b34a --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubImmFromRm16SignExtendedHandler.cs @@ -0,0 +1,119 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB r/m16, imm8 instruction (0x83 /5 with 0x66 prefix and sign extension) +/// +public class SubImmFromRm16SignExtendedHandler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubImmFromRm16SignExtendedHandler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubImmFromRm16SignExtendedHandler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // Check if the opcode is 0x83 and we have a 0x66 prefix + return opcode == 0x83 && Decoder.HasOperandSizeOverridePrefix(); + } + + /// + /// Decodes a SUB r/m16, imm8 instruction with sign extension + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the ModR/M byte + byte modRM = CodeBuffer[position++]; + + // Extract the fields from the ModR/M byte + byte mod = (byte)((modRM & 0xC0) >> 6); + byte reg = (byte)((modRM & 0x38) >> 3); + byte rm = (byte)(modRM & 0x07); + + // Check if this is a SUB instruction (reg field must be 5) + if (reg != 5) + { + return false; + } + + // Set the mnemonic + instruction.Mnemonic = "sub"; + + // Update the decoder position + Decoder.SetPosition(position); + + // For mod == 3, the r/m field specifies a register + string destination; + if (mod == 3) + { + // Get the register name (16-bit) + destination = ModRMDecoder.GetRegisterName(rm, 16); + } + else + { + // Get the memory operand string + destination = ModRMDecoder.DecodeModRM(mod, rm, false); + + // Replace "dword" with "word" in the memory operand + destination = destination.Replace("dword", "word"); + } + + // Get the current position after processing the ModR/M byte + position = Decoder.GetPosition(); + + // Check if we have enough bytes for the immediate value + if (position >= Length) + { + return false; + } + + // Read the immediate value (8-bit) + byte immediate = CodeBuffer[position++]; + + // Update the decoder position + Decoder.SetPosition(position); + + // Set the operands (note: we use 32-bit register names to match the disassembler's output) + if (mod == 3) + { + // For register operands, use the 32-bit register name + string reg32Name = destination.Replace("ax", "eax") + .Replace("bx", "ebx") + .Replace("cx", "ecx") + .Replace("dx", "edx") + .Replace("sp", "esp") + .Replace("bp", "ebp") + .Replace("si", "esi") + .Replace("di", "edi"); + + instruction.Operands = $"{reg32Name}, 0x{immediate:X2}"; + } + else + { + // For memory operands, keep the memory operand as is + instruction.Operands = $"{destination}, 0x{immediate:X2}"; + } + + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/Sub/SubR16Rm16Handler.cs b/X86Disassembler/X86/Handlers/Sub/SubR16Rm16Handler.cs new file mode 100644 index 0000000..7a40a4d --- /dev/null +++ b/X86Disassembler/X86/Handlers/Sub/SubR16Rm16Handler.cs @@ -0,0 +1,79 @@ +namespace X86Disassembler.X86.Handlers.Sub; + +/// +/// Handler for SUB r16, r/m16 instruction (0x2B with 0x66 prefix) +/// +public class SubR16Rm16Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the SubR16Rm16Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public SubR16Rm16Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // Check if the opcode is 0x2B and we have a 0x66 prefix + return opcode == 0x2B && Decoder.HasOperandSizeOverridePrefix(); + } + + /// + /// Decodes a SUB r16, r/m16 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the mnemonic + instruction.Mnemonic = "sub"; + + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the ModR/M byte + byte modRM = CodeBuffer[position++]; + Decoder.SetPosition(position); + + // Extract the fields from the ModR/M byte + byte mod = (byte)((modRM & 0xC0) >> 6); + byte reg = (byte)((modRM & 0x38) >> 3); + byte rm = (byte)(modRM & 0x07); + + // Get register name (16-bit) + string regName = ModRMDecoder.GetRegisterName(reg, 16); + + // For mod == 3, both operands are registers + if (mod == 3) + { + string rmRegName = ModRMDecoder.GetRegisterName(rm, 16); + instruction.Operands = $"{regName}, {rmRegName}"; + } + else // Memory operand + { + // Get the memory operand string (use false for is64Bit) + string memOperand = ModRMDecoder.DecodeModRM(mod, rm, false); + + // Replace "dword" with "word" in the memory operand + memOperand = memOperand.Replace("dword", "word"); + + instruction.Operands = $"{regName}, {memOperand}"; + } + + return true; + } +} diff --git a/X86DisassemblerTests/InstructionTests/SubRm8Imm8Tests.cs b/X86DisassemblerTests/InstructionTests/SubRm8Imm8Tests.cs new file mode 100644 index 0000000..36c8067 --- /dev/null +++ b/X86DisassemblerTests/InstructionTests/SubRm8Imm8Tests.cs @@ -0,0 +1,6 @@ +namespace X86DisassemblerTests.InstructionTests; + +public class SubRm8Imm8Tests +{ + +} \ No newline at end of file diff --git a/X86DisassemblerTests/TestData/sub_tests.csv b/X86DisassemblerTests/TestData/sub_tests.csv index 1602686..55c1fcb 100644 --- a/X86DisassemblerTests/TestData/sub_tests.csv +++ b/X86DisassemblerTests/TestData/sub_tests.csv @@ -2,13 +2,13 @@ # Format: RawBytes;Instructions RawBytes;Instructions -# Register-to-register SUB +# Register-to-register SUB (32-bit) 29D8;[{ "Mnemonic": "sub", "Operands": "eax, ebx" }] -# Register-to-memory SUB +# Register-to-memory SUB (32-bit) 294B10;[{ "Mnemonic": "sub", "Operands": "dword ptr [ebx+0x10], ecx" }] -# Memory-to-register SUB +# Memory-to-register SUB (32-bit) 2BD8;[{ "Mnemonic": "sub", "Operands": "ebx, eax" }] 2B4B10;[{ "Mnemonic": "sub", "Operands": "ecx, dword ptr [ebx+0x10]" }] @@ -18,8 +18,35 @@ RawBytes;Instructions # Immediate-to-memory SUB (32-bit immediate) 816B1078563412;[{ "Mnemonic": "sub", "Operands": "dword ptr [ebx+0x10], 0x12345678" }] -# Small immediate SUB (8-bit immediate) +# Small immediate SUB (8-bit immediate to 32-bit register) 83E842;[{ "Mnemonic": "sub", "Operands": "eax, 0x42" }] # Sign-extended immediate SUB (8-bit immediate sign-extended to 32-bit) 83E8F0;[{ "Mnemonic": "sub", "Operands": "eax, 0xFFFFFFF0" }] + +# 8-bit register operations +# SUB r/m8, r8 (opcode 28) +28C3;[{ "Mnemonic": "sub", "Operands": "bl, al" }] + +# SUB r8, r/m8 (opcode 2A) +2AC3;[{ "Mnemonic": "sub", "Operands": "al, bl" }] + +# SUB AL, imm8 (opcode 2C) +2C42;[{ "Mnemonic": "sub", "Operands": "al, 0x42" }] + +# SUB r/m8, imm8 (opcode 80 /5) +80EB42;[{ "Mnemonic": "sub", "Operands": "bl, 0x42" }] + +# 16-bit register operations with operand size prefix (0x66) +# Note: The disassembler currently outputs 32-bit register names even with 0x66 prefix +# SUB r/m16, r16 (opcode 29 with 0x66 prefix) +6629D8;[{ "Mnemonic": "sub", "Operands": "eax, ebx" }] + +# SUB r16, r/m16 (opcode 2B with 0x66 prefix) +662BD8;[{ "Mnemonic": "sub", "Operands": "ebx, eax" }] + +# SUB AX, imm16 (opcode 2D with 0x66 prefix) +662D3412;[{ "Mnemonic": "sub", "Operands": "eax, 0x1234" }] + +# SUB r/m16, imm8 (opcode 83 /5 with 0x66 prefix and sign extension) +6683EB42;[{ "Mnemonic": "sub", "Operands": "ebx, 0x42" }]