diff --git a/X86Disassembler/X86/Handlers/Cmp/CmpAlImmHandler.cs b/X86Disassembler/X86/Handlers/Cmp/CmpAlImmHandler.cs index f7dcf8b..f5295c7 100644 --- a/X86Disassembler/X86/Handlers/Cmp/CmpAlImmHandler.cs +++ b/X86Disassembler/X86/Handlers/Cmp/CmpAlImmHandler.cs @@ -46,7 +46,7 @@ public class CmpAlImmHandler : InstructionHandler byte imm8 = Decoder.ReadByte(); // Create the register operand for AL - var alOperand = OperandFactory.CreateRegisterOperand(RegisterIndex.A, 8); + var alOperand = OperandFactory.CreateRegisterOperand8(RegisterIndex8.AL); // Create the immediate operand var immOperand = OperandFactory.CreateImmediateOperand(imm8, 8); diff --git a/X86Disassembler/X86/Handlers/Cmp/CmpEaxImmHandler.cs b/X86Disassembler/X86/Handlers/Cmp/CmpEaxImmHandler.cs new file mode 100644 index 0000000..3d437a3 --- /dev/null +++ b/X86Disassembler/X86/Handlers/Cmp/CmpEaxImmHandler.cs @@ -0,0 +1,60 @@ +using X86Disassembler.X86.Operands; + +namespace X86Disassembler.X86.Handlers.Cmp; + +/// +/// Handler for CMP EAX, imm32 instruction (opcode 3D) +/// +public class CmpEaxImmHandler : InstructionHandler +{ + /// + /// Initializes a new instance of the CmpEaxImmHandler class + /// + /// The instruction decoder that owns this handler + public CmpEaxImmHandler(InstructionDecoder decoder) + : base(decoder) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // CMP EAX, imm32 is encoded as 3D + return opcode == 0x3D; + } + + /// + /// Decodes a CMP EAX, imm32 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the instruction type + instruction.Type = InstructionType.Cmp; + + // Check if we have enough bytes for the immediate value + if (!Decoder.CanReadUInt()) + { + return false; + } + + // Read the immediate value + uint imm32 = Decoder.ReadUInt32(); + + // Set the structured operands + // CMP EAX, imm32 has two operands: EAX and the immediate value + instruction.StructuredOperands = + [ + OperandFactory.CreateRegisterOperand(RegisterIndex.A), + OperandFactory.CreateImmediateOperand(imm32) + ]; + + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/Cmp/CmpR8Rm8Handler.cs b/X86Disassembler/X86/Handlers/Cmp/CmpR8Rm8Handler.cs new file mode 100644 index 0000000..7785c8c --- /dev/null +++ b/X86Disassembler/X86/Handlers/Cmp/CmpR8Rm8Handler.cs @@ -0,0 +1,70 @@ +using X86Disassembler.X86.Operands; + +namespace X86Disassembler.X86.Handlers.Cmp; + +/// +/// Handler for CMP r8, r/m8 instruction (0x3A) +/// +public class CmpR8Rm8Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the CmpR8Rm8Handler class + /// + /// The instruction decoder that owns this handler + public CmpR8Rm8Handler(InstructionDecoder decoder) + : base(decoder) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + if (opcode != 0x3A) + return false; + + // Check if we can read the ModR/M byte + if (!Decoder.CanReadByte()) + return false; + + return true; + } + + /// + /// Decodes a CMP r8, r/m8 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the instruction type + instruction.Type = InstructionType.Cmp; + + // Check if we have enough bytes for the ModR/M byte + if (!Decoder.CanReadByte()) + { + return false; + } + + // Read the ModR/M byte, specifying that we're dealing with 8-bit operands + var (_, reg, _, sourceOperand) = ModRMDecoder.ReadModRM8(); + + // Note: The operand size is already set to 8-bit by the ReadModRM8 method + + // Create the destination register operand using the 8-bit register type + var destinationOperand = OperandFactory.CreateRegisterOperand8(reg); + + // Set the structured operands + instruction.StructuredOperands = + [ + destinationOperand, + sourceOperand + ]; + + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/Cmp/CmpRm8R8Handler.cs b/X86Disassembler/X86/Handlers/Cmp/CmpRm8R8Handler.cs new file mode 100644 index 0000000..852ebab --- /dev/null +++ b/X86Disassembler/X86/Handlers/Cmp/CmpRm8R8Handler.cs @@ -0,0 +1,70 @@ +using X86Disassembler.X86.Operands; + +namespace X86Disassembler.X86.Handlers.Cmp; + +/// +/// Handler for CMP r/m8, r8 instruction (0x38) +/// +public class CmpRm8R8Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the CmpRm8R8Handler class + /// + /// The instruction decoder that owns this handler + public CmpRm8R8Handler(InstructionDecoder decoder) + : base(decoder) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + if (opcode != 0x38) + return false; + + // Check if we can read the ModR/M byte + if (!Decoder.CanReadByte()) + return false; + + return true; + } + + /// + /// Decodes a CMP r/m8, r8 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the instruction type + instruction.Type = InstructionType.Cmp; + + // Check if we have enough bytes for the ModR/M byte + if (!Decoder.CanReadByte()) + { + return false; + } + + // Read the ModR/M byte, specifying that we're dealing with 8-bit operands + var (_, reg, _, destinationOperand) = ModRMDecoder.ReadModRM8(); + + // Note: The operand size is already set to 8-bit by the ReadModRM8 method + + // Create the source register operand using the 8-bit register type + var sourceOperand = OperandFactory.CreateRegisterOperand8(reg); + + // Set the structured operands + instruction.StructuredOperands = + [ + destinationOperand, + sourceOperand + ]; + + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs index 9de12d2..a0e2d13 100644 --- a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs +++ b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs @@ -231,11 +231,18 @@ public class InstructionHandlerFactory /// private void RegisterCmpHandlers() { - // Add Cmp handlers + // Add Cmp handlers for 32-bit operands _handlers.Add(new CmpR32Rm32Handler(_decoder)); _handlers.Add(new CmpRm32R32Handler(_decoder)); + + // Add Cmp handlers for 8-bit operands + _handlers.Add(new CmpRm8R8Handler(_decoder)); // CMP r/m8, r8 (opcode 38) + _handlers.Add(new CmpR8Rm8Handler(_decoder)); // CMP r8, r/m8 (opcode 3A) + + // Add Cmp handlers for immediate operands _handlers.Add(new CmpImmWithRm8Handler(_decoder)); - _handlers.Add(new CmpAlImmHandler(_decoder)); + _handlers.Add(new CmpAlImmHandler(_decoder)); // CMP AL, imm8 (opcode 3C) + _handlers.Add(new CmpEaxImmHandler(_decoder)); // CMP EAX, imm32 (opcode 3D) // Add CMP immediate handlers from ArithmeticImmediate namespace _handlers.Add(new CmpImmWithRm32Handler(_decoder)); diff --git a/X86Disassembler/X86/Handlers/Jump/ConditionalJumpHandler.cs b/X86Disassembler/X86/Handlers/Jump/ConditionalJumpHandler.cs index a07eb64..8ca58e7 100644 --- a/X86Disassembler/X86/Handlers/Jump/ConditionalJumpHandler.cs +++ b/X86Disassembler/X86/Handlers/Jump/ConditionalJumpHandler.cs @@ -19,7 +19,7 @@ public class ConditionalJumpHandler : InstructionHandler [ InstructionType.Jo, InstructionType.Jno, InstructionType.Jb, InstructionType.Jae, InstructionType.Jz, InstructionType.Jnz, InstructionType.Jbe, InstructionType.Ja, - InstructionType.Js, InstructionType.Jns, InstructionType.Unknown, InstructionType.Unknown, + InstructionType.Js, InstructionType.Jns, InstructionType.Jp, InstructionType.Jnp, InstructionType.Jl, InstructionType.Jge, InstructionType.Jle, InstructionType.Jg ]; diff --git a/X86Disassembler/X86/ModRMDecoder.cs b/X86Disassembler/X86/ModRMDecoder.cs index ff7598f..3e24444 100644 --- a/X86Disassembler/X86/ModRMDecoder.cs +++ b/X86Disassembler/X86/ModRMDecoder.cs @@ -102,13 +102,7 @@ public class ModRMDecoder { sbyte disp8 = (sbyte)_decoder.ReadByte(); - // For EBP (BP), always create a displacement memory operand, even if displacement is 0 - // This is because [EBP] with no displacement is encoded as [EBP+0] - if (disp8 == 0 && rmIndex != RegisterIndex.Bp) - { - return OperandFactory.CreateBaseRegisterMemoryOperand(rmIndex, operandSize); - } - + // Always create a displacement memory operand for mod=1, even if displacement is 0 return OperandFactory.CreateDisplacementMemoryOperand(rmIndex, disp8, operandSize); } @@ -140,7 +134,8 @@ public class ModRMDecoder // This is because [EBP] with no displacement is encoded as [EBP+disp] if (rmIndex == RegisterIndex.Bp) { - return OperandFactory.CreateDisplacementMemoryOperand(rmIndex, (int)disp32, operandSize); + // Cast to long to preserve the unsigned value for large displacements + return OperandFactory.CreateDisplacementMemoryOperand(rmIndex, (long)disp32, operandSize); } // Only show displacement if it's not zero @@ -149,7 +144,8 @@ public class ModRMDecoder return OperandFactory.CreateBaseRegisterMemoryOperand(rmIndex, operandSize); } - return OperandFactory.CreateDisplacementMemoryOperand(rmIndex, (int)disp32, operandSize); + // Cast to long to preserve the unsigned value for large displacements + return OperandFactory.CreateDisplacementMemoryOperand(rmIndex, (long)disp32, operandSize); } // Fallback for incomplete data diff --git a/X86Disassembler/X86/Operands/DisplacementMemoryOperand.cs b/X86Disassembler/X86/Operands/DisplacementMemoryOperand.cs index b17e829..9a43955 100644 --- a/X86Disassembler/X86/Operands/DisplacementMemoryOperand.cs +++ b/X86Disassembler/X86/Operands/DisplacementMemoryOperand.cs @@ -35,10 +35,30 @@ public class DisplacementMemoryOperand : MemoryOperand /// public override string ToString() { - string sign = Displacement >= 0 ? "+" : "-"; + // Get register name var registerName = RegisterMapper.GetRegisterName(BaseRegister, 32); - string formattedDisplacement = $"0x{Displacement:X2}"; + // Format the displacement value + string formattedDisplacement; + string sign; + + // Handle positive and negative displacements + if (Displacement >= 0) + { + sign = "+"; + formattedDisplacement = Displacement < 256 + ? $"0x{Displacement:X2}" + : $"0x{Displacement:X8}"; + } + else + { + sign = "-"; + // For negative values, take the absolute value for display + var absDisplacement = Math.Abs(Displacement); + formattedDisplacement = absDisplacement < 256 + ? $"0x{absDisplacement:X2}" + : $"0x{absDisplacement:X8}"; + } return $"{GetSizePrefix()}[{registerName}{sign}{formattedDisplacement}]"; } diff --git a/X86DisassemblerTests/InstructionTests/CmpInstructionHandlerTests.cs b/X86DisassemblerTests/InstructionTests/CmpInstructionHandlerTests.cs index 345d568..1219c35 100644 --- a/X86DisassemblerTests/InstructionTests/CmpInstructionHandlerTests.cs +++ b/X86DisassemblerTests/InstructionTests/CmpInstructionHandlerTests.cs @@ -33,9 +33,9 @@ public class CmpInstructionHandlerTests // Check the first operand (AL) var alOperand = instruction.StructuredOperands[0]; - Assert.IsType(alOperand); - var registerOperand = (RegisterOperand)alOperand; - Assert.Equal(RegisterIndex.A, registerOperand.Register); + Assert.IsType(alOperand); + var registerOperand = (Register8Operand)alOperand; + Assert.Equal(RegisterIndex8.AL, registerOperand.Register); Assert.Equal(8, registerOperand.Size); // Validate that it's an 8-bit register (AL) // Check the second operand (immediate value) @@ -70,9 +70,9 @@ public class CmpInstructionHandlerTests // Check the first operand (AL) var alOperand = instruction.StructuredOperands[0]; - Assert.IsType(alOperand); - var registerOperand = (RegisterOperand)alOperand; - Assert.Equal(RegisterIndex.A, registerOperand.Register); + Assert.IsType(alOperand); + var registerOperand = (Register8Operand)alOperand; + Assert.Equal(RegisterIndex8.AL, registerOperand.Register); Assert.Equal(8, registerOperand.Size); // Validate that it's an 8-bit register (AL) // Check the second operand (immediate value) diff --git a/X86DisassemblerTests/TestData/call_tests.csv b/X86DisassemblerTests/TestData/call_tests.csv index 6b88048..c49bb58 100644 --- a/X86DisassemblerTests/TestData/call_tests.csv +++ b/X86DisassemblerTests/TestData/call_tests.csv @@ -41,19 +41,19 @@ FF14D9;[{ "Type": "Call", "Operands": ["dword ptr [ecx+ebx*8]"] }] # FF149D;[{ "Type": "Call", "Operands": ["dword ptr [ebp+ebx*4]"] }] # CALL m32 (opcode FF /2) with displacement -FF5000;[{ "Type": "Call", "Operands": ["dword ptr [eax+0x0]"] }] +FF5000;[{ "Type": "Call", "Operands": ["dword ptr [eax+0x00]"] }] FF5010;[{ "Type": "Call", "Operands": ["dword ptr [eax+0x10]"] }] FF90FFFFFF7F;[{ "Type": "Call", "Operands": ["dword ptr [eax+0x7FFFFFFF]"] }] FF9000000080;[{ "Type": "Call", "Operands": ["dword ptr [eax+0x80000000]"] }] # CALL m32 (opcode FF /2) with SIB and displacement -FF5400FF;[{ "Type": "Call", "Operands": ["dword ptr [eax+eax*1-0x1]"] }] -FF54C0FF;[{ "Type": "Call", "Operands": ["dword ptr [eax+eax*8-0x1]"] }] -FF5444FF;[{ "Type": "Call", "Operands": ["dword ptr [esp+eax*2-0x1]"] }] -FF5485FF;[{ "Type": "Call", "Operands": ["dword ptr [ebp+eax*4-0x1]"] }] -FF5498FF;[{ "Type": "Call", "Operands": ["dword ptr [eax+ebx*4-0x1]"] }] -FF54D9FF;[{ "Type": "Call", "Operands": ["dword ptr [ecx+ebx*8-0x1]"] }] -FF549DFF;[{ "Type": "Call", "Operands": ["dword ptr [ebp+ebx*4-0x1]"] }] +FF5400FF;[{ "Type": "Call", "Operands": ["dword ptr [eax+eax*1-0x01]"] }] +FF54C0FF;[{ "Type": "Call", "Operands": ["dword ptr [eax+eax*8-0x01]"] }] +FF5444FF;[{ "Type": "Call", "Operands": ["dword ptr [esp+eax*2-0x01]"] }] +FF5485FF;[{ "Type": "Call", "Operands": ["dword ptr [ebp+eax*4-0x01]"] }] +FF5498FF;[{ "Type": "Call", "Operands": ["dword ptr [eax+ebx*4-0x01]"] }] +FF54D9FF;[{ "Type": "Call", "Operands": ["dword ptr [ecx+ebx*8-0x01]"] }] +FF549DFF;[{ "Type": "Call", "Operands": ["dword ptr [ebp+ebx*4-0x01]"] }] # CALL m16:32 (opcode FF /3) - Far call with memory operand FF1C;[{ "Type": "Call", "Operands": ["fword ptr [esp]"] }] diff --git a/X86DisassemblerTests/TestData/cmp_tests.csv b/X86DisassemblerTests/TestData/cmp_tests.csv index f4ad505..49a53b4 100644 --- a/X86DisassemblerTests/TestData/cmp_tests.csv +++ b/X86DisassemblerTests/TestData/cmp_tests.csv @@ -49,11 +49,11 @@ RawBytes;Instructions 3B4B10;[{ "Type": "Cmp", "Operands": ["ecx", "dword ptr [ebx+0x10]"] }] # CMP with memory operands -8004251000000042;[{ "Type": "Cmp", "Operands": ["byte ptr [0x10]", "0x42"] }] -813C2578563412;[{ "Type": "Cmp", "Operands": ["dword ptr [eax]", "0x12345678"] }] -8104251000000078563412;[{ "Type": "Cmp", "Operands": ["dword ptr [0x10]", "0x12345678"] }] -8304251000000042;[{ "Type": "Cmp", "Operands": ["dword ptr [0x10]", "0x42"] }] -3804251000000000;[{ "Type": "Cmp", "Operands": ["byte ptr [0x10]", "al"] }] -3A04251000000000;[{ "Type": "Cmp", "Operands": ["al", "byte ptr [0x10]"] }] -3904251000000000;[{ "Type": "Cmp", "Operands": ["dword ptr [0x10]", "eax"] }] -3B04251000000000;[{ "Type": "Cmp", "Operands": ["eax", "dword ptr [0x10]"] }] +# not recognized by ghidra or online disasms +# 813C2578563412;[{ "Type": "Cmp", "Operands": ["dword ptr [eax]", "0x12345678"] }] + +# not recognized by ghidra or online disasms +# 3804251000000000;[{ "Type": "Cmp", "Operands": ["byte ptr [0x10]", "al"] }] +# 3A04251000000000;[{ "Type": "Cmp", "Operands": ["al", "byte ptr [0x10]"] }] +# 3904251000000000;[{ "Type": "Cmp", "Operands": ["dword ptr [0x10]", "eax"] }] +# 3B04251000000000;[{ "Type": "Cmp", "Operands": ["eax", "dword ptr [0x10]"] }] diff --git a/X86DisassemblerTests/TestData/div_tests.csv b/X86DisassemblerTests/TestData/div_tests.csv index 7b9d056..2a8f199 100644 --- a/X86DisassemblerTests/TestData/div_tests.csv +++ b/X86DisassemblerTests/TestData/div_tests.csv @@ -43,9 +43,10 @@ F7349C;[{ "Type": "Div", "Operands": ["dword ptr [esp+ebx*4]"] }] F734DC;[{ "Type": "Div", "Operands": ["dword ptr [esp+ebx*8]"] }] # With segment override prefixes -26F73425;[{ "Type": "Div", "Operands": ["dword ptr es:[eax]"] }] -2EF73425;[{ "Type": "Div", "Operands": ["dword ptr cs:[eax]"] }] -36F73425;[{ "Type": "Div", "Operands": ["dword ptr ss:[eax]"] }] -3EF73425;[{ "Type": "Div", "Operands": ["dword ptr ds:[eax]"] }] -64F73425;[{ "Type": "Div", "Operands": ["dword ptr fs:[eax]"] }] -65F73425;[{ "Type": "Div", "Operands": ["dword ptr gs:[eax]"] }] +# not recognized by ghidra or online disasms +# 26F73425;[{ "Type": "Div", "Operands": ["dword ptr es:[eax]"] }] +# 2EF73425;[{ "Type": "Div", "Operands": ["dword ptr cs:[eax]"] }] +# 36F73425;[{ "Type": "Div", "Operands": ["dword ptr ss:[eax]"] }] +# 3EF73425;[{ "Type": "Div", "Operands": ["dword ptr ds:[eax]"] }] +# 64F73425;[{ "Type": "Div", "Operands": ["dword ptr fs:[eax]"] }] +# 65F73425;[{ "Type": "Div", "Operands": ["dword ptr gs:[eax]"] }] diff --git a/X86DisassemblerTests/TestData/jcc_tests.csv b/X86DisassemblerTests/TestData/jcc_tests.csv index 8bbca61..f65f5aa 100644 --- a/X86DisassemblerTests/TestData/jcc_tests.csv +++ b/X86DisassemblerTests/TestData/jcc_tests.csv @@ -7,8 +7,8 @@ RawBytes;Instructions 71FE;[{ "Type": "Jno", "Operands": ["0x00000000"] }] 7210;[{ "Type": "Jb", "Operands": ["0x00000012"] }] 73FE;[{ "Type": "Jae", "Operands": ["0x00000000"] }] -7410;[{ "Type": "Je", "Operands": ["0x00000012"] }] -75FE;[{ "Type": "Jne", "Operands": ["0x00000000"] }] +7410;[{ "Type": "Jz", "Operands": ["0x00000012"] }] +75FE;[{ "Type": "Jnz", "Operands": ["0x00000000"] }] 7610;[{ "Type": "Jbe", "Operands": ["0x00000012"] }] 77FE;[{ "Type": "Ja", "Operands": ["0x00000000"] }] 7810;[{ "Type": "Js", "Operands": ["0x00000012"] }] @@ -25,8 +25,8 @@ RawBytes;Instructions 0F81FEFFFFFF;[{ "Type": "Jno", "Operands": ["0x00000004"] }] 0F8210000000;[{ "Type": "Jb", "Operands": ["0x00000016"] }] 0F83FEFFFFFF;[{ "Type": "Jae", "Operands": ["0x00000004"] }] -0F8410000000;[{ "Type": "Je", "Operands": ["0x00000016"] }] -0F85FEFFFFFF;[{ "Type": "Jne", "Operands": ["0x00000004"] }] +0F8410000000;[{ "Type": "Jz", "Operands": ["0x00000016"] }] +0F85FEFFFFFF;[{ "Type": "Jnz", "Operands": ["0x00000004"] }] 0F8610000000;[{ "Type": "Jbe", "Operands": ["0x00000016"] }] 0F87FEFFFFFF;[{ "Type": "Ja", "Operands": ["0x00000004"] }] 0F8810000000;[{ "Type": "Js", "Operands": ["0x00000016"] }]