From fa1a7f582c8768e0635eece70ec34cef5b73973f Mon Sep 17 00:00:00 2001 From: bird_egop Date: Wed, 16 Apr 2025 21:44:02 +0300 Subject: [PATCH] Added support for far call instructions and PUSH imm16. Fixed invalid test cases in call_tests.csv and or_tests.csv --- .../X86/Handlers/InstructionHandlerFactory.cs | 6 +- .../X86/Handlers/Push/PushImm16Handler.cs | 67 ++++++ .../X86/Operands/FarPointerOperand.cs | 206 ++++++++++++++++++ X86Disassembler/X86/Operands/MemoryOperand.cs | 2 + .../X86/Operands/OperandFactory.cs | 11 + X86DisassemblerTests/TestData/call_tests.csv | 9 +- X86DisassemblerTests/TestData/or_tests.csv | 7 +- 7 files changed, 304 insertions(+), 4 deletions(-) create mode 100644 X86Disassembler/X86/Handlers/Push/PushImm16Handler.cs create mode 100644 X86Disassembler/X86/Operands/FarPointerOperand.cs diff --git a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs index 49cc7a1..a5ef2d2 100644 --- a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs +++ b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs @@ -137,8 +137,9 @@ public class InstructionHandlerFactory private void RegisterCallHandlers() { // Add Call handlers - _handlers.Add(new CallRel32Handler(_decoder)); - _handlers.Add(new CallRm32Handler(_decoder)); + _handlers.Add(new CallRel32Handler(_decoder)); // CALL rel32 (opcode E8) + _handlers.Add(new CallRm32Handler(_decoder)); // CALL r/m32 (opcode FF /2) + _handlers.Add(new CallFarPtrHandler(_decoder)); // CALL m16:32 (opcode FF /3) - Far call } /// @@ -378,6 +379,7 @@ public class InstructionHandlerFactory // Add PUSH immediate handlers _handlers.Add(new PushImm32Handler(_decoder)); // PUSH imm32 (opcode 68) + _handlers.Add(new PushImm16Handler(_decoder)); // PUSH imm16 with operand size prefix (0x66 0x68) _handlers.Add(new PushImm8Handler(_decoder)); // PUSH imm8 (opcode 6A) } diff --git a/X86Disassembler/X86/Handlers/Push/PushImm16Handler.cs b/X86Disassembler/X86/Handlers/Push/PushImm16Handler.cs new file mode 100644 index 0000000..8bf1a8c --- /dev/null +++ b/X86Disassembler/X86/Handlers/Push/PushImm16Handler.cs @@ -0,0 +1,67 @@ +namespace X86Disassembler.X86.Handlers.Push; + +using Operands; + +/// +/// Handler for PUSH imm16 instruction with operand size prefix (0x66 0x68) +/// +public class PushImm16Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the PushImm16Handler class + /// + /// The instruction decoder that owns this handler + public PushImm16Handler(InstructionDecoder decoder) + : base(decoder) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // Check for operand size prefix (66h) followed by PUSH imm (68h) + if (opcode != 0x68) + { + return false; + } + + // Check if we have an operand size prefix + return Decoder.HasOperandSizePrefix(); + } + + /// + /// Decodes a PUSH imm16 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the instruction type + instruction.Type = InstructionType.Push; + + // Check if we have enough bytes for the 16-bit immediate + if(!Decoder.CanReadUShort()) + { + return false; + } + + // Read the 16-bit immediate value + ushort imm16 = Decoder.ReadUInt16(); + + // Create an immediate operand with 16-bit size + var immOperand = new ImmediateOperand(imm16, 16); + + // Set the structured operands + instruction.StructuredOperands = + [ + immOperand + ]; + + return true; + } +} diff --git a/X86Disassembler/X86/Operands/FarPointerOperand.cs b/X86Disassembler/X86/Operands/FarPointerOperand.cs new file mode 100644 index 0000000..2c051f9 --- /dev/null +++ b/X86Disassembler/X86/Operands/FarPointerOperand.cs @@ -0,0 +1,206 @@ +namespace X86Disassembler.X86.Operands; + +/// +/// Represents a far pointer memory operand (m16:32) in an x86 instruction +/// +public class FarPointerOperand : MemoryOperand +{ + /// + /// Gets the base register (if any) + /// + public RegisterIndex? BaseRegister { get; } + + /// + /// Gets the index register (if any) + /// + public RegisterIndex? IndexRegister { get; } + + /// + /// Gets the scale factor (if using an index register) + /// + public int Scale { get; } + + /// + /// Gets the displacement value (if any) + /// + public long Displacement { get; } + + /// + /// Gets the direct memory address (if any) + /// + public long? Address { get; } + + /// + /// Initializes a new instance of the FarPointerOperand class for base register memory operands + /// + /// The base register + /// Optional segment override + public FarPointerOperand(RegisterIndex baseRegister, string? segmentOverride = null) + : base(48, segmentOverride) + { + Type = OperandType.MemoryBaseReg; + BaseRegister = baseRegister; + IndexRegister = null; + Scale = 1; + Displacement = 0; + Address = null; + } + + /// + /// Initializes a new instance of the FarPointerOperand class for displacement memory operands + /// + /// The base register + /// The displacement value + /// Optional segment override + public FarPointerOperand(RegisterIndex baseRegister, long displacement, string? segmentOverride = null) + : base(48, segmentOverride) + { + Type = OperandType.MemoryBaseRegPlusOffset; + BaseRegister = baseRegister; + IndexRegister = null; + Scale = 1; + Displacement = displacement; + Address = null; + } + + /// + /// Initializes a new instance of the FarPointerOperand class for scaled index memory operands + /// + /// The index register + /// The scale factor + /// The optional base register + /// The displacement value + /// Optional segment override + public FarPointerOperand(RegisterIndex indexRegister, int scale, RegisterIndex? baseRegister = null, long displacement = 0, string? segmentOverride = null) + : base(48, segmentOverride) + { + Type = OperandType.MemoryIndexed; + BaseRegister = baseRegister; + IndexRegister = indexRegister; + Scale = scale; + Displacement = displacement; + Address = null; + } + + /// + /// Initializes a new instance of the FarPointerOperand class for direct memory operands + /// + /// The memory address + /// Optional segment override + public FarPointerOperand(long address, string? segmentOverride = null) + : base(48, segmentOverride) + { + Type = OperandType.MemoryDirect; + BaseRegister = null; + IndexRegister = null; + Scale = 1; + Displacement = 0; + Address = address; + } + + /// + /// Creates a FarPointerOperand from an existing memory operand + /// + /// The memory operand to convert + /// A new FarPointerOperand with the same properties + public static FarPointerOperand FromMemoryOperand(MemoryOperand memoryOperand) + { + // Create the appropriate type of FarPointerOperand based on the source operand type + if (memoryOperand is BaseRegisterMemoryOperand baseRegMemOperand) + { + return new FarPointerOperand(baseRegMemOperand.BaseRegister, memoryOperand.SegmentOverride); + } + else if (memoryOperand is DisplacementMemoryOperand dispMemOperand) + { + return new FarPointerOperand(dispMemOperand.BaseRegister, dispMemOperand.Displacement, memoryOperand.SegmentOverride); + } + else if (memoryOperand is DirectMemoryOperand directMemOperand) + { + return new FarPointerOperand(directMemOperand.Address, memoryOperand.SegmentOverride); + } + else if (memoryOperand is ScaledIndexMemoryOperand sibMemOperand) + { + return new FarPointerOperand(sibMemOperand.IndexRegister, sibMemOperand.Scale, sibMemOperand.BaseRegister, sibMemOperand.Displacement, memoryOperand.SegmentOverride); + } + + // Default case - shouldn't happen if all memory operand types are handled above + throw new System.ArgumentException("Unsupported memory operand type", nameof(memoryOperand)); + } + + /// + /// Returns a string representation of this operand + /// + public override string ToString() + { + string prefix = "fword ptr "; + + // Add segment override if present + if (SegmentOverride != null) + { + prefix = $"{prefix}{SegmentOverride}:"; + } + + // Format based on operand type + return Type switch + { + OperandType.MemoryBaseReg => $"{prefix}[{RegisterMapper.GetRegisterName(BaseRegister!.Value, 32)}]", + + OperandType.MemoryBaseRegPlusOffset => $"{prefix}[{RegisterMapper.GetRegisterName(BaseRegister!.Value, 32)}+0x{Displacement:X}]", + + OperandType.MemoryDirect => $"{prefix}[0x{Address!.Value:X}]", + + OperandType.MemoryIndexed => FormatSIBString(prefix), + + _ => $"{prefix}[unknown]" + }; + } + + /// + /// Formats the string representation for SIB addressing mode + /// + private string FormatSIBString(string prefix) + { + string result = prefix + "["; + + // Add base register if present + if (BaseRegister.HasValue) + { + result += RegisterMapper.GetRegisterName(BaseRegister.Value, 32); + } + + // Add index register with scale if present + if (IndexRegister.HasValue) + { + // Add + if we already have a base register + if (BaseRegister.HasValue) + { + result += "+"; + } + + result += RegisterMapper.GetRegisterName(IndexRegister.Value, 32); + + // Add scale if not 1 + if (Scale > 1) + { + result += $"*{Scale}"; + } + } + + // Add displacement if non-zero + if (Displacement != 0) + { + // Format as signed value + if (Displacement > 0) + { + result += $"+0x{Displacement:X}"; + } + else + { + result += $"-0x{-Displacement:X}"; + } + } + + result += "]"; + return result; + } +} diff --git a/X86Disassembler/X86/Operands/MemoryOperand.cs b/X86Disassembler/X86/Operands/MemoryOperand.cs index c4636ef..31aa3e2 100644 --- a/X86Disassembler/X86/Operands/MemoryOperand.cs +++ b/X86Disassembler/X86/Operands/MemoryOperand.cs @@ -27,11 +27,13 @@ public abstract class MemoryOperand : Operand /// The size prefix string protected string GetSizePrefix() { + // Use size-based prefix string sizePrefix = Size switch { 8 => "byte ptr ", 16 => "word ptr ", 32 => "dword ptr ", + 48 => "fword ptr ", 64 => "qword ptr ", _ => "" }; diff --git a/X86Disassembler/X86/Operands/OperandFactory.cs b/X86Disassembler/X86/Operands/OperandFactory.cs index 6d489a9..3140276 100644 --- a/X86Disassembler/X86/Operands/OperandFactory.cs +++ b/X86Disassembler/X86/Operands/OperandFactory.cs @@ -209,4 +209,15 @@ public static class OperandFactory { return new FPURegisterOperand(registerIndex); } + + /// + /// Creates a far pointer operand from an existing memory operand + /// + /// The memory operand to convert to a far pointer + /// A far pointer operand with the same addressing mode as the given memory operand + public static FarPointerOperand CreateFarPointerOperand(MemoryOperand memoryOperand) + { + // Create a new FarPointerOperand with the same properties as the given memory operand + return FarPointerOperand.FromMemoryOperand(memoryOperand); + } } diff --git a/X86DisassemblerTests/TestData/call_tests.csv b/X86DisassemblerTests/TestData/call_tests.csv index e8c33b8..5bb9ade 100644 --- a/X86DisassemblerTests/TestData/call_tests.csv +++ b/X86DisassemblerTests/TestData/call_tests.csv @@ -68,7 +68,14 @@ FF549DFF;[{ "Type": "Call", "Operands": ["dword ptr [ebp+ebx*4-0x01]"] }] # CALL m16:32 (opcode FF /3) - Far call with memory operand FF1C;[{ "Type": "Call", "Operands": ["fword ptr [esp]"] }] -FF1D;[{ "Type": "Call", "Operands": ["fword ptr [ebp]"] }] +# SPECIAL CASE in x86 encoding: +# When Mod=00 and R/M=101 (EBP), this doesn't actually refer to [EBP] but instead indicates +# a 32-bit displacement-only addressing mode. The correct encoding for "Call fword ptr [ebp]" +# would be FF5D00 which is "Call fword ptr [ebp+0x0]" +# FF1D;[{ "Type": "Call", "Operands": ["fword ptr [ebp]"] }] + +# Correct encoding for "Call fword ptr [ebp]" with displacement 0 +FF5D00;[{ "Type": "Call", "Operands": ["fword ptr [ebp+0x0]"] }] FF1E;[{ "Type": "Call", "Operands": ["fword ptr [esi]"] }] FF1F;[{ "Type": "Call", "Operands": ["fword ptr [edi]"] }] FF18;[{ "Type": "Call", "Operands": ["fword ptr [eax]"] }] diff --git a/X86DisassemblerTests/TestData/or_tests.csv b/X86DisassemblerTests/TestData/or_tests.csv index cac9830..88577d0 100644 --- a/X86DisassemblerTests/TestData/or_tests.csv +++ b/X86DisassemblerTests/TestData/or_tests.csv @@ -12,7 +12,12 @@ RawBytes;Instructions 83C842;[{ "Type": "Or", "Operands": ["eax", "0x42"] }] # OR with memory operands -810C2578563412;[{ "Type": "Or", "Operands": ["dword ptr [eax]", "0x12345678"] }] +# INVALID TEST: The following test has an invalid encoding. +# When ModR/M byte has R/M=100 (ESP), a SIB byte is required. +# 810C2578563412;[{ "Type": "Or", "Operands": ["dword ptr [eax]", "0x12345678"] }] + +# Correct encoding for "Or dword ptr [eax], 0x12345678" +810878563412;[{ "Type": "Or", "Operands": ["dword ptr [eax]", "0x12345678"] }] # OR r/m32, r32 (opcode 09) 09D8;[{ "Type": "Or", "Operands": ["eax", "ebx"] }]