From 9445fb225f59667596906685954d1450cc0a7ec7 Mon Sep 17 00:00:00 2001 From: bird_egop Date: Wed, 16 Apr 2025 19:07:32 +0300 Subject: [PATCH] fixes and removed unused code --- X86Disassembler/X86/Constants.cs | 19 ++++ .../X86/Handlers/Add/AddAlImmHandler.cs | 2 +- .../X86/Handlers/Add/AddEaxImmHandler.cs | 4 +- .../X86/Handlers/Add/AddImmToRm32Handler.cs | 2 +- .../X86/Handlers/Add/AddR32Rm32Handler.cs | 2 +- .../X86/Handlers/Add/AddRm32R32Handler.cs | 2 +- .../X86/Handlers/And/AndAlImmHandler.cs | 2 +- .../X86/Handlers/Cmp/CmpImmWithRm8Handler.cs | 21 +---- X86Disassembler/X86/ModRMDecoder.cs | 94 +++---------------- X86Disassembler/X86/Operand.cs | 13 +-- .../X86/Operands/DisplacementMemoryOperand.cs | 9 +- .../X86/Operands/ScaledIndexMemoryOperand.cs | 4 +- .../CmpInstructionSequenceTests.cs | 21 +++-- .../InstructionDecoderTests.cs | 6 +- X86DisassemblerTests/TestData/call_tests.csv | 6 +- 15 files changed, 66 insertions(+), 141 deletions(-) create mode 100644 X86Disassembler/X86/Constants.cs diff --git a/X86Disassembler/X86/Constants.cs b/X86Disassembler/X86/Constants.cs new file mode 100644 index 0000000..c27a639 --- /dev/null +++ b/X86Disassembler/X86/Constants.cs @@ -0,0 +1,19 @@ +namespace X86Disassembler.X86; + +public class Constants +{ + // ModR/M byte masks + public const byte MOD_MASK = 0xC0; // 11000000b + public const byte REG_MASK = 0x38; // 00111000b + public const byte RM_MASK = 0x07; // 00000111b + + // SIB byte masks + public const byte SIB_SCALE_MASK = 0xC0; // 11000000b + public const byte SIB_INDEX_MASK = 0x38; // 00111000b + public const byte SIB_BASE_MASK = 0x07; // 00000111b + + // Register names for different sizes + public static readonly string[] RegisterNames16 = {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}; + public static readonly string[] RegisterNames32 = {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}; + +} \ No newline at end of file diff --git a/X86Disassembler/X86/Handlers/Add/AddAlImmHandler.cs b/X86Disassembler/X86/Handlers/Add/AddAlImmHandler.cs index 410422f..632c64e 100644 --- a/X86Disassembler/X86/Handlers/Add/AddAlImmHandler.cs +++ b/X86Disassembler/X86/Handlers/Add/AddAlImmHandler.cs @@ -48,7 +48,7 @@ public class AddAlImmHandler : InstructionHandler byte imm8 = Decoder.ReadByte(); // Create the destination register operand (AL) - var destinationOperand = OperandFactory.CreateRegisterOperand(RegisterIndex.A, 8); + var destinationOperand = OperandFactory.CreateRegisterOperand8(RegisterIndex8.AL); // Create the source immediate operand var sourceOperand = OperandFactory.CreateImmediateOperand(imm8); diff --git a/X86Disassembler/X86/Handlers/Add/AddEaxImmHandler.cs b/X86Disassembler/X86/Handlers/Add/AddEaxImmHandler.cs index 516048d..87c111d 100644 --- a/X86Disassembler/X86/Handlers/Add/AddEaxImmHandler.cs +++ b/X86Disassembler/X86/Handlers/Add/AddEaxImmHandler.cs @@ -46,8 +46,8 @@ public class AddEaxImmHandler : InstructionHandler instruction.StructuredOperands = [ - OperandFactory.CreateRegisterOperand(RegisterIndex.A, 32), - OperandFactory.CreateImmediateOperand(imm32, 32) + OperandFactory.CreateRegisterOperand(RegisterIndex.A), + OperandFactory.CreateImmediateOperand(imm32) ]; return true; diff --git a/X86Disassembler/X86/Handlers/Add/AddImmToRm32Handler.cs b/X86Disassembler/X86/Handlers/Add/AddImmToRm32Handler.cs index 1b3714c..14453d2 100644 --- a/X86Disassembler/X86/Handlers/Add/AddImmToRm32Handler.cs +++ b/X86Disassembler/X86/Handlers/Add/AddImmToRm32Handler.cs @@ -65,7 +65,7 @@ public class AddImmToRm32Handler : InstructionHandler instruction.StructuredOperands = [ destOperand, - OperandFactory.CreateImmediateOperand(imm, 32) + OperandFactory.CreateImmediateOperand(imm) ]; return true; diff --git a/X86Disassembler/X86/Handlers/Add/AddR32Rm32Handler.cs b/X86Disassembler/X86/Handlers/Add/AddR32Rm32Handler.cs index eec5fc4..9b90a31 100644 --- a/X86Disassembler/X86/Handlers/Add/AddR32Rm32Handler.cs +++ b/X86Disassembler/X86/Handlers/Add/AddR32Rm32Handler.cs @@ -50,7 +50,7 @@ public class AddR32Rm32Handler : InstructionHandler var (_, reg, _, sourceOperand) = ModRMDecoder.ReadModRM(); // Create the destination register operand from the reg field - var destinationOperand = OperandFactory.CreateRegisterOperand(reg, 32); + var destinationOperand = OperandFactory.CreateRegisterOperand(reg); // Set the structured operands instruction.StructuredOperands = diff --git a/X86Disassembler/X86/Handlers/Add/AddRm32R32Handler.cs b/X86Disassembler/X86/Handlers/Add/AddRm32R32Handler.cs index 22fea1c..4b3274f 100644 --- a/X86Disassembler/X86/Handlers/Add/AddRm32R32Handler.cs +++ b/X86Disassembler/X86/Handlers/Add/AddRm32R32Handler.cs @@ -50,7 +50,7 @@ public class AddRm32R32Handler : InstructionHandler var (_, reg, _, destinationOperand) = ModRMDecoder.ReadModRM(); // Create the source register operand from the reg field - var sourceOperand = OperandFactory.CreateRegisterOperand(reg, 32); + var sourceOperand = OperandFactory.CreateRegisterOperand(reg); // Set the structured operands instruction.StructuredOperands = diff --git a/X86Disassembler/X86/Handlers/And/AndAlImmHandler.cs b/X86Disassembler/X86/Handlers/And/AndAlImmHandler.cs index f40fa5c..6d7c629 100644 --- a/X86Disassembler/X86/Handlers/And/AndAlImmHandler.cs +++ b/X86Disassembler/X86/Handlers/And/AndAlImmHandler.cs @@ -38,7 +38,7 @@ public class AndAlImmHandler : InstructionHandler instruction.Type = InstructionType.And; // Create the destination register operand (AL) - var destinationOperand = OperandFactory.CreateRegisterOperand(RegisterIndex.A, 8); + var destinationOperand = OperandFactory.CreateRegisterOperand8(RegisterIndex8.AL); // Read immediate value if (!Decoder.CanReadByte()) diff --git a/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm8Handler.cs b/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm8Handler.cs index e8805fc..1847244 100644 --- a/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm8Handler.cs +++ b/X86Disassembler/X86/Handlers/Cmp/CmpImmWithRm8Handler.cs @@ -46,25 +46,8 @@ public class CmpImmWithRm8Handler : InstructionHandler // Set the instruction type instruction.Type = InstructionType.Cmp; - // Read the ModR/M byte - var (mod, _, rm, rawOperand) = ModRMDecoder.ReadModRM8(); - - // For the tests to pass, we need to ensure that when the base register is EBP/BP, - // we create a DisplacementMemoryOperand instead of a BaseRegisterMemoryOperand - Operand destinationOperand; - - // Check if we have a BaseRegisterMemoryOperand with EBP/BP as the base register - if (rawOperand is BaseRegisterMemoryOperand baseMemory && - (baseMemory.BaseRegister == RegisterIndex.Bp)) - { - // Create a DisplacementMemoryOperand with 0 displacement - destinationOperand = OperandFactory.CreateDisplacementMemoryOperand8(baseMemory.BaseRegister, 0); - } - else - { - // Use the operand as is - destinationOperand = rawOperand; - } + // Read the ModR/M byte, specifying that we're dealing with 8-bit operands + var (_, _, _, destinationOperand) = ModRMDecoder.ReadModRM8(); // Note: The operand size is already set to 8-bit by the ReadModRM8 method diff --git a/X86Disassembler/X86/ModRMDecoder.cs b/X86Disassembler/X86/ModRMDecoder.cs index 3f6ee08..9cfc350 100644 --- a/X86Disassembler/X86/ModRMDecoder.cs +++ b/X86Disassembler/X86/ModRMDecoder.cs @@ -7,20 +7,6 @@ using Operands; /// public class ModRMDecoder { - // ModR/M byte masks - private const byte MOD_MASK = 0xC0; // 11000000b - private const byte REG_MASK = 0x38; // 00111000b - private const byte RM_MASK = 0x07; // 00000111b - - // SIB byte masks - private const byte SIB_SCALE_MASK = 0xC0; // 11000000b - private const byte SIB_INDEX_MASK = 0x38; // 00111000b - private const byte SIB_BASE_MASK = 0x07; // 00000111b - - // Register names for different sizes - private static readonly string[] RegisterNames16 = {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}; - private static readonly string[] RegisterNames32 = {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}; - // The instruction decoder that owns this ModRM decoder private readonly InstructionDecoder _decoder; @@ -98,17 +84,6 @@ public class ModRMDecoder return DecodeModRMInternal(mod, rmIndex, 8); } - /// - /// Decodes a ModR/M byte to get a 16-bit operand - /// - /// The mod field (2 bits) - /// The r/m field as RegisterIndex - /// The 16-bit operand object - public Operand DecodeModRM16(byte mod, RegisterIndex rmIndex) - { - return DecodeModRMInternal(mod, rmIndex, 16); - } - /// /// Internal implementation for decoding a ModR/M byte to get an operand with specific size /// @@ -249,32 +224,11 @@ public class ModRMDecoder byte modRM = _decoder.PeakByte(); // Extract fields from ModR/M byte - byte regIndex = (byte)((modRM & REG_MASK) >> 3); // Middle 3 bits (bits 3-5) + byte regIndex = (byte)((modRM & Constants.REG_MASK) >> 3); // Middle 3 bits (bits 3-5) return regIndex; } - /// - /// Reads a ModR/M byte and returns the raw field values - /// - /// A tuple containing the raw mod, reg, and rm fields from the ModR/M byte - public (byte mod, byte reg, byte rm) ReadModRMRaw() - { - if (!_decoder.CanReadByte()) - { - return (0, 0, 0); - } - - byte modRM = _decoder.ReadByte(); - - // Extract fields from ModR/M byte - byte mod = (byte)((modRM & MOD_MASK) >> 6); // Top 2 bits (bits 6-7) - byte regIndex = (byte)((modRM & REG_MASK) >> 3); // Middle 3 bits (bits 3-5) - byte rmIndex = (byte)(modRM & RM_MASK); // Bottom 3 bits (bits 0-2) - - return (mod, regIndex, rmIndex); - } - /// /// Reads and decodes a ModR/M byte for standard 32-bit operands /// @@ -356,9 +310,9 @@ public class ModRMDecoder byte modRM = _decoder.ReadByte(); // Extract fields from ModR/M byte - byte mod = (byte)((modRM & MOD_MASK) >> 6); - byte regIndex = (byte)((modRM & REG_MASK) >> 3); - byte rmIndex = (byte)(modRM & RM_MASK); + byte mod = (byte)((modRM & Constants.MOD_MASK) >> 6); + byte regIndex = (byte)((modRM & Constants.REG_MASK) >> 3); + byte rmIndex = (byte)(modRM & Constants.RM_MASK); // Map the ModR/M register indices to RegisterIndex enum values RegisterIndex reg = MapModRMToRegisterIndex(regIndex); @@ -384,9 +338,9 @@ public class ModRMDecoder byte modRM = _decoder.ReadByte(); // Extract fields from ModR/M byte - byte mod = (byte)((modRM & MOD_MASK) >> 6); - byte regIndex = (byte)((modRM & REG_MASK) >> 3); - byte rmIndex = (byte)(modRM & RM_MASK); + byte mod = (byte)((modRM & Constants.MOD_MASK) >> 6); + byte regIndex = (byte)((modRM & Constants.REG_MASK) >> 3); + byte rmIndex = (byte)(modRM & Constants.RM_MASK); // Map the ModR/M register indices to RegisterIndex8 enum values RegisterIndex8 reg = MapModRMToRegisterIndex8(regIndex); @@ -425,9 +379,9 @@ public class ModRMDecoder { // Extract fields from SIB byte - byte scale = (byte)((sib & SIB_SCALE_MASK) >> 6); - int indexIndex = (sib & SIB_INDEX_MASK) >> 3; - int baseIndex = sib & SIB_BASE_MASK; + byte scale = (byte)((sib & Constants.SIB_SCALE_MASK) >> 6); + int indexIndex = (sib & Constants.SIB_INDEX_MASK) >> 3; + int baseIndex = sib & Constants.SIB_BASE_MASK; // Map the SIB register indices to RegisterIndex enum values RegisterIndex index = MapModRMToRegisterIndex(indexIndex); @@ -516,9 +470,9 @@ public class ModRMDecoder { return size switch { - 16 => RegisterNames16[(int)regIndex], - 32 => RegisterNames32[(int)regIndex], - 64 => RegisterNames32[(int)regIndex], // For now, reuse 32-bit names for 64-bit + 16 => Constants.RegisterNames16[(int)regIndex], + 32 => Constants.RegisterNames32[(int)regIndex], + 64 => Constants.RegisterNames32[(int)regIndex], // For now, reuse 32-bit names for 64-bit _ => "unknown" }; } @@ -532,26 +486,4 @@ public class ModRMDecoder { return regIndex8.ToString().ToLower(); } - - /// - /// Maps a RegisterIndex8 enum value to the corresponding RegisterIndex enum value for base registers - /// - /// The RegisterIndex8 enum value - /// The corresponding RegisterIndex enum value - private RegisterIndex MapRegister8ToBaseRegister(RegisterIndex8 regIndex8) - { - // Map 8-bit register indices to their corresponding 32-bit register indices - return regIndex8 switch - { - RegisterIndex8.AL => RegisterIndex.A, - RegisterIndex8.CL => RegisterIndex.C, - RegisterIndex8.DL => RegisterIndex.D, - RegisterIndex8.BL => RegisterIndex.B, - RegisterIndex8.AH => RegisterIndex.A, - RegisterIndex8.CH => RegisterIndex.C, - RegisterIndex8.DH => RegisterIndex.D, - RegisterIndex8.BH => RegisterIndex.B, - _ => RegisterIndex.A // Default to EAX - }; - } } \ No newline at end of file diff --git a/X86Disassembler/X86/Operand.cs b/X86Disassembler/X86/Operand.cs index b2e93f1..098142d 100644 --- a/X86Disassembler/X86/Operand.cs +++ b/X86Disassembler/X86/Operand.cs @@ -11,21 +11,10 @@ public abstract class Operand public OperandType Type { get; protected set; } /// - /// Gets the size of the operand in bits (8, 16, 32) + /// Gets or sets the size of the operand in bits (8, 16, 32) /// public int Size { get; protected set; } - /// - /// Sets the size of the operand in bits - /// - /// The new size in bits (8, 16, 32, or 64) - /// The operand instance for method chaining - public virtual Operand WithSize(int size) - { - Size = size; - return this; - } - /// /// Returns a string representation of this operand /// diff --git a/X86Disassembler/X86/Operands/DisplacementMemoryOperand.cs b/X86Disassembler/X86/Operands/DisplacementMemoryOperand.cs index 8ab7e84..fb99f55 100644 --- a/X86Disassembler/X86/Operands/DisplacementMemoryOperand.cs +++ b/X86Disassembler/X86/Operands/DisplacementMemoryOperand.cs @@ -35,13 +35,10 @@ public class DisplacementMemoryOperand : MemoryOperand /// public override string ToString() { - string sign = Displacement >= 0 ? "+" : ""; + string sign = Displacement >= 0 ? "+" : "-"; var registerName = ModRMDecoder.GetRegisterName(BaseRegister, 32); - - // Format small displacements (< 256) with at least 2 digits - string formattedDisplacement = Math.Abs(Displacement) < 256 - ? $"0x{Math.Abs(Displacement):X2}" - : $"0x{Math.Abs(Displacement):X}"; + + string formattedDisplacement = $"0x{Displacement:X2}"; return $"{GetSizePrefix()}[{registerName}{sign}{formattedDisplacement}]"; } diff --git a/X86Disassembler/X86/Operands/ScaledIndexMemoryOperand.cs b/X86Disassembler/X86/Operands/ScaledIndexMemoryOperand.cs index 0b5f3bd..47dffd1 100644 --- a/X86Disassembler/X86/Operands/ScaledIndexMemoryOperand.cs +++ b/X86Disassembler/X86/Operands/ScaledIndexMemoryOperand.cs @@ -56,8 +56,8 @@ public class ScaledIndexMemoryOperand : MemoryOperand if (Displacement != 0) { - string sign = Displacement > 0 ? "+" : ""; - dispPart = $"{sign}0x{Math.Abs(Displacement):X}"; + string sign = Displacement > 0 ? "+" : "-"; + dispPart = $"{sign}0x{Math.Abs(Displacement):X2}"; } return $"{GetSizePrefix()}[{baseRegPart}{indexPart}{dispPart}]"; diff --git a/X86DisassemblerTests/InstructionTests/CmpInstructionSequenceTests.cs b/X86DisassemblerTests/InstructionTests/CmpInstructionSequenceTests.cs index 8d2e1f6..4090b52 100644 --- a/X86DisassemblerTests/InstructionTests/CmpInstructionSequenceTests.cs +++ b/X86DisassemblerTests/InstructionTests/CmpInstructionSequenceTests.cs @@ -15,7 +15,7 @@ public class CmpInstructionSequenceTests public void CmpImmWithRm8_ComplexMemoryOperand_Correctly() { // Arrange - // CMP BYTE PTR [EBP], 0x03 (80 7D 00 03) + // CMP BYTE PTR [EBP+0x0], 0x03 (80 7D 00 03) byte[] codeBuffer = new byte[] { 0x80, 0x7D, 0x00, 0x03 }; var disassembler = new Disassembler(codeBuffer, 0x1C46); @@ -32,9 +32,10 @@ public class CmpInstructionSequenceTests // Check the first operand (memory operand) var memoryOperand = instruction.StructuredOperands[0]; - Assert.IsType(memoryOperand); - var memory = (BaseRegisterMemoryOperand)memoryOperand; + Assert.IsType(memoryOperand); + var memory = (DisplacementMemoryOperand)memoryOperand; Assert.Equal(RegisterIndex.Bp, memory.BaseRegister); // Base register is ECX + Assert.Equal(0, memory.Displacement); // displacement should be 0x0 Assert.Equal(8, memory.Size); // Memory size is 8 bits (BYTE) // Check the second operand (immediate value) @@ -51,7 +52,7 @@ public class CmpInstructionSequenceTests public void CmpImmWithRm8_FollowedByJge_Correctly() { // Arrange - // CMP BYTE PTR [EBP], 0x03 (80 7D 00 03) + // CMP BYTE PTR [EBP+0x0], 0x03 (80 7D 00 03) // JGE +5 (7D 05) byte[] codeBuffer = new byte[] { 0x80, 0x7D, 0x00, 0x03, 0x7D, 0x05 }; var disassembler = new Disassembler(codeBuffer, 0x1C46); @@ -71,9 +72,10 @@ public class CmpInstructionSequenceTests // Check the first operand (memory operand) var memoryOperand = cmpInstruction.StructuredOperands[0]; - Assert.IsType(memoryOperand); - var memory = (BaseRegisterMemoryOperand)memoryOperand; + Assert.IsType(memoryOperand); + var memory = (DisplacementMemoryOperand)memoryOperand; Assert.Equal(RegisterIndex.Bp, memory.BaseRegister); // Base register is ECX + Assert.Equal(0, memory.Displacement); // displacement should be 0x0 Assert.Equal(8, memory.Size); // Memory size is 8 bits (BYTE) // Check the second operand (immediate value) @@ -106,7 +108,7 @@ public class CmpInstructionSequenceTests public void CmpJgeSequence_DecodesCorrectly() { // Arrange - // CMP BYTE PTR [EBP], 0x03 (80 7D 00 03) + // CMP BYTE PTR [EBP+0x0], 0x03 (80 7D 00 03) // JGE +5 (7D 05) // ADD EBP, 0x18 (83 C5 18) // JMP +3 (EB 03) @@ -132,9 +134,10 @@ public class CmpInstructionSequenceTests // Check the first operand (memory operand) var memoryOperand = cmpInstruction.StructuredOperands[0]; - Assert.IsType(memoryOperand); - var memory = (BaseRegisterMemoryOperand)memoryOperand; + Assert.IsType(memoryOperand); + var memory = (DisplacementMemoryOperand)memoryOperand; Assert.Equal(RegisterIndex.Bp, memory.BaseRegister); // Base register is ECX + Assert.Equal(0, memory.Displacement); // displacement should be is 0x0 Assert.Equal(8, memory.Size); // Memory size is 8 bits (BYTE) // Check the second operand (immediate value) diff --git a/X86DisassemblerTests/InstructionTests/InstructionDecoderTests.cs b/X86DisassemblerTests/InstructionTests/InstructionDecoderTests.cs index 3bf08dc..c488594 100644 --- a/X86DisassemblerTests/InstructionTests/InstructionDecoderTests.cs +++ b/X86DisassemblerTests/InstructionTests/InstructionDecoderTests.cs @@ -73,9 +73,9 @@ public class InstructionDecoderTests // Check the second operand (AL) var alOperand = instruction.StructuredOperands[1]; - Assert.IsType(alOperand); - var alRegisterOperand = (RegisterOperand)alOperand; - Assert.Equal(RegisterIndex.A, alRegisterOperand.Register); + Assert.IsType(alOperand); + var alRegisterOperand = (Register8Operand)alOperand; + Assert.Equal(RegisterIndex8.AL, alRegisterOperand.Register); Assert.Equal(8, alRegisterOperand.Size); // Validate that it's an 8-bit register (AL) } diff --git a/X86DisassemblerTests/TestData/call_tests.csv b/X86DisassemblerTests/TestData/call_tests.csv index fa39d83..6b88048 100644 --- a/X86DisassemblerTests/TestData/call_tests.csv +++ b/X86DisassemblerTests/TestData/call_tests.csv @@ -33,10 +33,12 @@ FF17;[{ "Type": "Call", "Operands": ["dword ptr [edi]"] }] FF1400;[{ "Type": "Call", "Operands": ["dword ptr [eax+eax*1]"] }] FF14C0;[{ "Type": "Call", "Operands": ["dword ptr [eax+eax*8]"] }] FF1444;[{ "Type": "Call", "Operands": ["dword ptr [esp+eax*2]"] }] -FF1485;[{ "Type": "Call", "Operands": ["dword ptr [ebp+eax*4]"] }] +# not recognized neither by ghidra nor online disasms +# FF1485;[{ "Type": "Call", "Operands": ["dword ptr [ebp+eax*4]"] }] FF1498;[{ "Type": "Call", "Operands": ["dword ptr [eax+ebx*4]"] }] FF14D9;[{ "Type": "Call", "Operands": ["dword ptr [ecx+ebx*8]"] }] -FF149D;[{ "Type": "Call", "Operands": ["dword ptr [ebp+ebx*4]"] }] +# not recognized neither by ghidra nor online disasms +# FF149D;[{ "Type": "Call", "Operands": ["dword ptr [ebp+ebx*4]"] }] # CALL m32 (opcode FF /2) with displacement FF5000;[{ "Type": "Call", "Operands": ["dword ptr [eax+0x0]"] }]