diff --git a/X86Disassembler/X86/Handlers/ControlFlowHandler.cs b/X86Disassembler/X86/Handlers/ControlFlowHandler.cs new file mode 100644 index 0000000..96e5fe6 --- /dev/null +++ b/X86Disassembler/X86/Handlers/ControlFlowHandler.cs @@ -0,0 +1,281 @@ +namespace X86Disassembler.X86.Handlers; + +/// +/// Handler for control flow instructions (JMP, CALL, RET, etc.) +/// +public class ControlFlowHandler : InstructionHandler +{ + // Condition codes for conditional jumps + private static readonly string[] ConditionCodes = { + "o", "no", "b", "ae", "e", "ne", "be", "a", + "s", "ns", "p", "np", "l", "ge", "le", "g" + }; + + /// + /// Initializes a new instance of the ControlFlowHandler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public ControlFlowHandler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // RET instruction + if (opcode == 0xC3 || opcode == 0xC2) + { + return true; + } + + // CALL instruction + if (opcode == 0xE8) + { + return true; + } + + // JMP instructions + if (opcode == 0xE9 || opcode == 0xEB) + { + return true; + } + + // Conditional jumps + if (opcode >= 0x70 && opcode <= 0x7F) + { + return true; + } + + // INT instructions + if (opcode == 0xCC || opcode == 0xCD) + { + return true; + } + + // JECXZ instruction + if (opcode == 0xE3) + { + return true; + } + + return false; + } + + /// + /// Decodes a control flow instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the mnemonic based on the opcode + instruction.Mnemonic = OpcodeMap.GetMnemonic(opcode); + + // Handle different types of control flow instructions + if (opcode == 0xC3) // RET + { + // No operands for RET + instruction.Operands = string.Empty; + return true; + } + else if (opcode == 0xC2) // RET imm16 + { + return DecodeRETImm16(instruction); + } + else if (opcode == 0xE8) // CALL rel32 + { + return DecodeCALLRel32(instruction); + } + else if (opcode == 0xE9) // JMP rel32 + { + return DecodeJMPRel32(instruction); + } + else if (opcode == 0xEB) // JMP rel8 + { + return DecodeJMPRel8(instruction); + } + else if (opcode >= 0x70 && opcode <= 0x7F) // Conditional jumps + { + return DecodeConditionalJump(opcode, instruction); + } + else if (opcode == 0xCC) // INT3 + { + // No operands for INT3 + instruction.Operands = string.Empty; + return true; + } + else if (opcode == 0xCD) // INT imm8 + { + return DecodeINTImm8(instruction); + } + else if (opcode == 0xE3) // JECXZ rel8 + { + return DecodeJECXZRel8(instruction); + } + + return false; + } + + /// + /// Decodes a RET instruction with 16-bit immediate operand + /// + private bool DecodeRETImm16(Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position + 2 > Length) + { + return false; + } + + // Read the immediate value + ushort imm16 = BitConverter.ToUInt16(CodeBuffer, position); + Decoder.SetPosition(position + 2); + + instruction.Operands = $"0x{imm16:X4}"; + return true; + } + + /// + /// Decodes a CALL instruction with 32-bit relative offset + /// + private bool DecodeCALLRel32(Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position + 4 > Length) + { + return false; + } + + // Read the relative offset + int offset = BitConverter.ToInt32(CodeBuffer, position); + Decoder.SetPosition(position + 4); + + // Calculate the target address (relative to the next instruction) + uint targetAddress = (uint)(position + offset); + + instruction.Operands = $"0x{targetAddress:X8}"; + return true; + } + + /// + /// Decodes a JMP instruction with 32-bit relative offset + /// + private bool DecodeJMPRel32(Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position + 4 > Length) + { + return false; + } + + // Read the relative offset + int offset = BitConverter.ToInt32(CodeBuffer, position); + Decoder.SetPosition(position + 4); + + // Calculate the target address (relative to the next instruction) + uint targetAddress = (uint)(position + offset); + + instruction.Operands = $"0x{targetAddress:X8}"; + return true; + } + + /// + /// Decodes a JMP instruction with 8-bit relative offset + /// + private bool DecodeJMPRel8(Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the relative offset + sbyte offset = (sbyte)CodeBuffer[position]; + Decoder.SetPosition(position + 1); + + // Calculate the target address (relative to the next instruction) + uint targetAddress = (uint)(position + offset + 1); // +1 because the offset is relative to the next instruction + + instruction.Operands = $"0x{targetAddress:X8}"; + return true; + } + + /// + /// Decodes a conditional jump instruction + /// + private bool DecodeConditionalJump(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the relative offset + sbyte offset = (sbyte)CodeBuffer[position]; + Decoder.SetPosition(position + 1); + + // Calculate the target address (relative to the next instruction) + uint targetAddress = (uint)(position + offset + 1); // +1 because the offset is relative to the next instruction + + instruction.Operands = $"0x{targetAddress:X8}"; + return true; + } + + /// + /// Decodes an INT instruction with 8-bit immediate operand + /// + private bool DecodeINTImm8(Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the immediate value + byte imm8 = CodeBuffer[position]; + Decoder.SetPosition(position + 1); + + instruction.Operands = $"0x{imm8:X2}"; + return true; + } + + /// + /// Decodes a JECXZ instruction with 8-bit relative offset + /// + private bool DecodeJECXZRel8(Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the relative offset + sbyte offset = (sbyte)CodeBuffer[position]; + Decoder.SetPosition(position + 1); + + // Calculate the target address (relative to the next instruction) + uint targetAddress = (uint)(position + offset + 1); // +1 because the offset is relative to the next instruction + + instruction.Operands = $"0x{targetAddress:X8}"; + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/DataTransferHandler.cs b/X86Disassembler/X86/Handlers/DataTransferHandler.cs new file mode 100644 index 0000000..e2226b5 --- /dev/null +++ b/X86Disassembler/X86/Handlers/DataTransferHandler.cs @@ -0,0 +1,326 @@ +namespace X86Disassembler.X86.Handlers; + +/// +/// Handler for data transfer instructions (MOV, PUSH, POP, etc.) +/// +public class DataTransferHandler : InstructionHandler +{ + /// + /// Initializes a new instance of the DataTransferHandler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public DataTransferHandler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + // MOV instructions + if ((opcode >= 0x88 && opcode <= 0x8B) || // MOV r/m, r and MOV r, r/m + (opcode >= 0xB0 && opcode <= 0xB7) || // MOV r8, imm8 + (opcode >= 0xB8 && opcode <= 0xBF) || // MOV r32, imm32 + opcode == 0xA0 || opcode == 0xA1 || // MOV AL/EAX, moffs + opcode == 0xA2 || opcode == 0xA3) // MOV moffs, AL/EAX + { + return true; + } + + // PUSH instructions + if ((opcode >= 0x50 && opcode <= 0x57) || // PUSH r32 + opcode == 0x68 || opcode == 0x6A) // PUSH imm32/imm8 + { + return true; + } + + // POP instructions + if (opcode >= 0x58 && opcode <= 0x5F) // POP r32 + { + return true; + } + + // XCHG instructions + if (opcode >= 0x90 && opcode <= 0x97) // XCHG EAX, r32 + { + return true; + } + + return false; + } + + /// + /// Decodes a data transfer instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + // Set the mnemonic based on the opcode + instruction.Mnemonic = OpcodeMap.GetMnemonic(opcode); + + // Handle different types of data transfer instructions + if (opcode >= 0x88 && opcode <= 0x8B) // MOV r/m, r and MOV r, r/m + { + return DecodeMOVRegMem(opcode, instruction); + } + else if (opcode >= 0xB0 && opcode <= 0xB7) // MOV r8, imm8 + { + return DecodeMOVRegImm8(opcode, instruction); + } + else if (opcode >= 0xB8 && opcode <= 0xBF) // MOV r32, imm32 + { + return DecodeMOVRegImm32(opcode, instruction); + } + else if (opcode == 0xA0 || opcode == 0xA1) // MOV AL/EAX, moffs + { + return DecodeMOVAccMem(opcode, instruction); + } + else if (opcode == 0xA2 || opcode == 0xA3) // MOV moffs, AL/EAX + { + return DecodeMOVMemAcc(opcode, instruction); + } + else if (opcode >= 0x50 && opcode <= 0x57) // PUSH r32 + { + return DecodePUSHReg(opcode, instruction); + } + else if (opcode == 0x68) // PUSH imm32 + { + return DecodePUSHImm32(instruction); + } + else if (opcode == 0x6A) // PUSH imm8 + { + return DecodePUSHImm8(instruction); + } + else if (opcode >= 0x58 && opcode <= 0x5F) // POP r32 + { + return DecodePOPReg(opcode, instruction); + } + else if (opcode >= 0x90 && opcode <= 0x97) // XCHG EAX, r32 + { + return DecodeXCHGEAXReg(opcode, instruction); + } + + return false; + } + + /// + /// Decodes a MOV instruction with register and memory operands + /// + private bool DecodeMOVRegMem(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the ModR/M byte + var (mod, reg, rm, memOperand) = ModRMDecoder.ReadModRM(); + + // Determine direction (0 = r/m to reg, 1 = reg to r/m) + bool direction = (opcode & 0x02) != 0; + + // Determine operand size (0 = 8-bit, 1 = 32-bit) + bool operandSize32 = (opcode & 0x01) != 0; + + // Get register name based on size + string regName = ModRMDecoder.GetRegisterName(reg, operandSize32 ? 32 : 8); + + // For mod == 3, both operands are registers + if (mod == 3) + { + string rmRegName = ModRMDecoder.GetRegisterName(rm, operandSize32 ? 32 : 8); + instruction.Operands = direction ? $"{rmRegName}, {regName}" : $"{regName}, {rmRegName}"; + } + else // Memory operand + { + instruction.Operands = direction ? $"{memOperand}, {regName}" : $"{regName}, {memOperand}"; + } + + return true; + } + + /// + /// Decodes a MOV instruction with 8-bit register and immediate operand + /// + private bool DecodeMOVRegImm8(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Register is encoded in the low 3 bits of the opcode + int reg = opcode & 0x07; + string regName = ModRMDecoder.GetRegisterName(reg, 8); + + // Read the immediate value + byte imm8 = CodeBuffer[position]; + Decoder.SetPosition(position + 1); + + instruction.Operands = $"{regName}, 0x{imm8:X2}"; + return true; + } + + /// + /// Decodes a MOV instruction with 32-bit register and immediate operand + /// + private bool DecodeMOVRegImm32(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position + 4 > Length) + { + return false; + } + + // Register is encoded in the low 3 bits of the opcode + int reg = opcode & 0x07; + string regName = ModRMDecoder.GetRegisterName(reg, 32); + + // Read the immediate value + uint imm32 = BitConverter.ToUInt32(CodeBuffer, position); + Decoder.SetPosition(position + 4); + + instruction.Operands = $"{regName}, 0x{imm32:X8}"; + return true; + } + + /// + /// Decodes a MOV instruction with accumulator (AL/EAX) and memory operand + /// + private bool DecodeMOVAccMem(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position + 4 > Length) + { + return false; + } + + // Determine operand size (0xA0 = 8-bit, 0xA1 = 32-bit) + bool operandSize32 = opcode == 0xA1; + string regName = operandSize32 ? "eax" : "al"; + + // Read the memory offset + uint offset = BitConverter.ToUInt32(CodeBuffer, position); + Decoder.SetPosition(position + 4); + + instruction.Operands = $"{regName}, [0x{offset:X8}]"; + return true; + } + + /// + /// Decodes a MOV instruction with memory operand and accumulator (AL/EAX) + /// + private bool DecodeMOVMemAcc(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position + 4 > Length) + { + return false; + } + + // Determine operand size (0xA2 = 8-bit, 0xA3 = 32-bit) + bool operandSize32 = opcode == 0xA3; + string regName = operandSize32 ? "eax" : "al"; + + // Read the memory offset + uint offset = BitConverter.ToUInt32(CodeBuffer, position); + Decoder.SetPosition(position + 4); + + instruction.Operands = $"[0x{offset:X8}], {regName}"; + return true; + } + + /// + /// Decodes a PUSH instruction with register operand + /// + private bool DecodePUSHReg(byte opcode, Instruction instruction) + { + // Register is encoded in the low 3 bits of the opcode + int reg = opcode & 0x07; + string regName = ModRMDecoder.GetRegisterName(reg, 32); + + instruction.Operands = regName; + return true; + } + + /// + /// Decodes a PUSH instruction with 32-bit immediate operand + /// + private bool DecodePUSHImm32(Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position + 4 > Length) + { + return false; + } + + // Read the immediate value + uint imm32 = BitConverter.ToUInt32(CodeBuffer, position); + Decoder.SetPosition(position + 4); + + instruction.Operands = $"0x{imm32:X8}"; + return true; + } + + /// + /// Decodes a PUSH instruction with 8-bit immediate operand + /// + private bool DecodePUSHImm8(Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the immediate value + byte imm8 = CodeBuffer[position]; + Decoder.SetPosition(position + 1); + + instruction.Operands = $"0x{imm8:X2}"; + return true; + } + + /// + /// Decodes a POP instruction with register operand + /// + private bool DecodePOPReg(byte opcode, Instruction instruction) + { + // Register is encoded in the low 3 bits of the opcode + int reg = opcode & 0x07; + string regName = ModRMDecoder.GetRegisterName(reg, 32); + + instruction.Operands = regName; + return true; + } + + /// + /// Decodes an XCHG instruction with EAX and register operands + /// + private bool DecodeXCHGEAXReg(byte opcode, Instruction instruction) + { + // Register is encoded in the low 3 bits of the opcode + int reg = opcode & 0x07; + string regName = ModRMDecoder.GetRegisterName(reg, 32); + + instruction.Operands = $"eax, {regName}"; + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/FloatingPointHandler.cs b/X86Disassembler/X86/Handlers/FloatingPointHandler.cs new file mode 100644 index 0000000..9902f48 --- /dev/null +++ b/X86Disassembler/X86/Handlers/FloatingPointHandler.cs @@ -0,0 +1,167 @@ +namespace X86Disassembler.X86.Handlers; + +/// +/// Handler for floating-point instructions (D8-DF opcodes) +/// +public class FloatingPointHandler : InstructionHandler +{ + // Floating-point instruction mnemonics based on opcode and ModR/M reg field + private static readonly string[][] FpuMnemonics = new string[8][]; + + /// + /// Static constructor to initialize the FPU mnemonic tables + /// + static FloatingPointHandler() + { + InitializeFpuMnemonics(); + } + + /// + /// Initializes the FPU mnemonic tables + /// + private static void InitializeFpuMnemonics() + { + // Initialize all tables + for (int i = 0; i < 8; i++) + { + FpuMnemonics[i] = new string[8]; + for (int j = 0; j < 8; j++) + { + FpuMnemonics[i][j] = "??"; + } + } + + // D8 opcode - operations on float32 + FpuMnemonics[0][0] = "fadd"; + FpuMnemonics[0][1] = "fmul"; + FpuMnemonics[0][2] = "fcom"; + FpuMnemonics[0][3] = "fcomp"; + FpuMnemonics[0][4] = "fsub"; + FpuMnemonics[0][5] = "fsubr"; + FpuMnemonics[0][6] = "fdiv"; + FpuMnemonics[0][7] = "fdivr"; + + // D9 opcode - load, store, and control operations + FpuMnemonics[1][0] = "fld"; + FpuMnemonics[1][2] = "fst"; + FpuMnemonics[1][3] = "fstp"; + FpuMnemonics[1][4] = "fldenv"; + FpuMnemonics[1][5] = "fldcw"; + FpuMnemonics[1][6] = "fnstenv"; + FpuMnemonics[1][7] = "fnstcw"; + + // DA opcode - operations on int32 + FpuMnemonics[2][0] = "fiadd"; + FpuMnemonics[2][1] = "fimul"; + FpuMnemonics[2][2] = "ficom"; + FpuMnemonics[2][3] = "ficomp"; + FpuMnemonics[2][4] = "fisub"; + FpuMnemonics[2][5] = "fisubr"; + FpuMnemonics[2][6] = "fidiv"; + FpuMnemonics[2][7] = "fidivr"; + + // DB opcode - load/store int32, misc + FpuMnemonics[3][0] = "fild"; + FpuMnemonics[3][2] = "fist"; + FpuMnemonics[3][3] = "fistp"; + FpuMnemonics[3][5] = "fld"; + FpuMnemonics[3][7] = "fstp"; + + // DC opcode - operations on float64 + FpuMnemonics[4][0] = "fadd"; + FpuMnemonics[4][1] = "fmul"; + FpuMnemonics[4][2] = "fcom"; + FpuMnemonics[4][3] = "fcomp"; + FpuMnemonics[4][4] = "fsub"; + FpuMnemonics[4][5] = "fsubr"; + FpuMnemonics[4][6] = "fdiv"; + FpuMnemonics[4][7] = "fdivr"; + + // DD opcode - load/store float64 + FpuMnemonics[5][0] = "fld"; + FpuMnemonics[5][2] = "fst"; + FpuMnemonics[5][3] = "fstp"; + FpuMnemonics[5][4] = "frstor"; + FpuMnemonics[5][6] = "fnsave"; + FpuMnemonics[5][7] = "fnstsw"; + + // DE opcode - operations on int16 + FpuMnemonics[6][0] = "fiadd"; + FpuMnemonics[6][1] = "fimul"; + FpuMnemonics[6][2] = "ficom"; + FpuMnemonics[6][3] = "ficomp"; + FpuMnemonics[6][4] = "fisub"; + FpuMnemonics[6][5] = "fisubr"; + FpuMnemonics[6][6] = "fidiv"; + FpuMnemonics[6][7] = "fidivr"; + + // DF opcode - load/store int16, misc + FpuMnemonics[7][0] = "fild"; + FpuMnemonics[7][2] = "fist"; + FpuMnemonics[7][3] = "fistp"; + FpuMnemonics[7][4] = "fbld"; + FpuMnemonics[7][5] = "fild"; + FpuMnemonics[7][6] = "fbstp"; + FpuMnemonics[7][7] = "fistp"; + } + + /// + /// Initializes a new instance of the FloatingPointHandler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public FloatingPointHandler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + return opcode >= 0xD8 && opcode <= 0xDF; + } + + /// + /// Decodes a floating-point instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // The opcode index in our tables (0-7 for D8-DF) + int opcodeIndex = opcode - 0xD8; + + // Read the ModR/M byte + var (mod, reg, rm, operand) = ModRMDecoder.ReadModRM(opcodeIndex == 7); // DF uses 64-bit operands + + // Set the mnemonic based on the opcode and reg field + instruction.Mnemonic = FpuMnemonics[opcodeIndex][reg]; + + // For memory operands, set the operand + if (mod != 3) // Memory operand + { + instruction.Operands = operand; + } + else // Register operand (ST(i)) + { + // For register operands, we need to handle the stack registers + // This is a simplified implementation and may need to be expanded + instruction.Operands = $"st({rm})"; + } + + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/Group1Handler.cs b/X86Disassembler/X86/Handlers/Group1Handler.cs new file mode 100644 index 0000000..80a6460 --- /dev/null +++ b/X86Disassembler/X86/Handlers/Group1Handler.cs @@ -0,0 +1,104 @@ +namespace X86Disassembler.X86.Handlers; + +/// +/// Handler for Group 1 instructions (ADD, OR, ADC, SBB, AND, SUB, XOR, CMP) +/// +public class Group1Handler : InstructionHandler +{ + /// + /// Initializes a new instance of the Group1Handler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + public Group1Handler(byte[] codeBuffer, InstructionDecoder decoder, int length) + : base(codeBuffer, decoder, length) + { + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public override bool CanHandle(byte opcode) + { + return opcode == 0x80 || opcode == 0x81 || opcode == 0x83; + } + + /// + /// Decodes a Group 1 instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public override bool Decode(byte opcode, Instruction instruction) + { + int position = Decoder.GetPosition(); + + if (position >= Length) + { + return false; + } + + // Read the ModR/M byte + var (mod, reg, rm, destOperand) = ModRMDecoder.ReadModRM(); + + // Determine the operation based on reg field + instruction.Mnemonic = OpcodeMap.Group1Operations[reg]; + + // Read the immediate value based on opcode + string immOperand; + position = Decoder.GetPosition(); + + switch (opcode) + { + case 0x80: // 8-bit immediate + if (position < Length) + { + byte imm8 = CodeBuffer[position]; + Decoder.SetPosition(position + 1); + immOperand = $"0x{imm8:X2}"; + } + else + { + immOperand = "???"; + } + break; + + case 0x81: // 32-bit immediate + if (position + 4 <= Length) + { + uint imm32 = BitConverter.ToUInt32(CodeBuffer, position); + Decoder.SetPosition(position + 4); + immOperand = $"0x{imm32:X8}"; + } + else + { + immOperand = "???"; + } + break; + + case 0x83: // 8-bit sign-extended immediate + if (position < Length) + { + sbyte imm8 = (sbyte)CodeBuffer[position]; + Decoder.SetPosition(position + 1); + immOperand = $"0x{imm8:X2}"; + } + else + { + immOperand = "???"; + } + break; + + default: + return false; + } + + // Set the operands + instruction.Operands = $"{destOperand}, {immOperand}"; + + return true; + } +} diff --git a/X86Disassembler/X86/Handlers/InstructionHandler.cs b/X86Disassembler/X86/Handlers/InstructionHandler.cs new file mode 100644 index 0000000..20604c3 --- /dev/null +++ b/X86Disassembler/X86/Handlers/InstructionHandler.cs @@ -0,0 +1,48 @@ +namespace X86Disassembler.X86.Handlers; + +/// +/// Base class for all instruction handlers +/// +public abstract class InstructionHandler +{ + // Buffer containing the code to decode + protected readonly byte[] CodeBuffer; + + // The instruction decoder that owns this handler + protected readonly InstructionDecoder Decoder; + + // Length of the buffer + protected readonly int Length; + + // ModRM decoder for handling addressing modes + protected readonly ModRMDecoder ModRMDecoder; + + /// + /// Initializes a new instance of the InstructionHandler class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this handler + /// The length of the buffer + protected InstructionHandler(byte[] codeBuffer, InstructionDecoder decoder, int length) + { + CodeBuffer = codeBuffer; + Decoder = decoder; + Length = length; + ModRMDecoder = new ModRMDecoder(codeBuffer, decoder, length); + } + + /// + /// Checks if this handler can decode the given opcode + /// + /// The opcode to check + /// True if this handler can decode the opcode + public abstract bool CanHandle(byte opcode); + + /// + /// Decodes an instruction + /// + /// The opcode of the instruction + /// The instruction object to populate + /// True if the instruction was successfully decoded + public abstract bool Decode(byte opcode, Instruction instruction); +} diff --git a/X86Disassembler/X86/InstructionDecoder.cs b/X86Disassembler/X86/InstructionDecoder.cs index dadd195..78f40b9 100644 --- a/X86Disassembler/X86/InstructionDecoder.cs +++ b/X86Disassembler/X86/InstructionDecoder.cs @@ -1,11 +1,13 @@ namespace X86Disassembler.X86; +using X86Disassembler.X86.Handlers; + /// -/// Decoder for x86 instructions +/// Decodes x86 instructions /// public class InstructionDecoder { - // Instruction prefixes + // Instruction prefix bytes private const byte PREFIX_LOCK = 0xF0; private const byte PREFIX_REPNE = 0xF2; private const byte PREFIX_REP = 0xF3; @@ -18,44 +20,6 @@ public class InstructionDecoder private const byte PREFIX_OPERAND_SIZE = 0x66; private const byte PREFIX_ADDRESS_SIZE = 0x67; - // Common opcodes - private const byte OPCODE_INT3 = 0xCC; - private const byte OPCODE_NOP = 0x90; - private const byte OPCODE_RET = 0xC3; - private const byte OPCODE_CALL_NEAR_RELATIVE = 0xE8; - private const byte OPCODE_JMP_NEAR_RELATIVE = 0xE9; - private const byte OPCODE_JMP_SHORT_RELATIVE = 0xEB; - - // Opcode groups - private const byte OPCODE_GROUP_1_BYTE = 0x80; - private const byte OPCODE_GROUP_1_WORD_DWORD = 0x81; - private const byte OPCODE_GROUP_1_BYTE_IMM8 = 0x83; - - // ModR/M byte masks - private const byte MODRM_MOD_MASK = 0xC0; // 11000000b - private const byte MODRM_REG_MASK = 0x38; // 00111000b - private const byte MODRM_RM_MASK = 0x07; // 00000111b - - // SIB byte masks - private const byte SIB_SCALE_MASK = 0xC0; // 11000000b - private const byte SIB_INDEX_MASK = 0x38; // 00111000b - private const byte SIB_BASE_MASK = 0x07; // 00000111b - - // Register names - private static readonly string[] RegisterNames8 = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; - private static readonly string[] RegisterNames16 = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; - private static readonly string[] RegisterNames32 = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" }; - private static readonly string[] SegmentRegisterNames = { "es", "cs", "ss", "ds", "fs", "gs" }; - - // Condition codes for conditional jumps - private static readonly string[] ConditionCodes = { - "o", "no", "b", "ae", "e", "ne", "be", "a", - "s", "ns", "p", "np", "l", "ge", "le", "g" - }; - - // One-byte opcode map - private static readonly string[] OneByteOpcodes = new string[256]; - // Buffer containing the code to decode private readonly byte[] _codeBuffer; @@ -65,178 +29,8 @@ public class InstructionDecoder // Length of the buffer private readonly int _length; - /// - /// Static constructor to initialize the opcode maps - /// - static InstructionDecoder() - { - InitializeOpcodeMaps(); - } - - /// - /// Initializes the opcode maps - /// - private static void InitializeOpcodeMaps() - { - // Initialize all entries to "??" (unknown) - for (int i = 0; i < 256; i++) - { - OneByteOpcodes[i] = "??"; - } - - // Floating-point instructions - OneByteOpcodes[0xD8] = "fadd"; // Various FP instructions based on ModR/M - OneByteOpcodes[0xD9] = "fld"; // Various FP instructions based on ModR/M - OneByteOpcodes[0xDA] = "fiadd"; // Various FP instructions based on ModR/M - OneByteOpcodes[0xDB] = "fild"; // Various FP instructions based on ModR/M - OneByteOpcodes[0xDC] = "fadd"; // Various FP instructions based on ModR/M - OneByteOpcodes[0xDD] = "fld"; // Various FP instructions based on ModR/M - OneByteOpcodes[0xDE] = "fiadd"; // Various FP instructions based on ModR/M - OneByteOpcodes[0xDF] = "fistp"; // Various FP instructions based on ModR/M - - // Group 1 instructions (ADD, OR, ADC, SBB, AND, SUB, XOR, CMP) - OneByteOpcodes[0x80] = "group1b"; - OneByteOpcodes[0x81] = "group1d"; - OneByteOpcodes[0x83] = "group1s"; // Sign-extended immediate - - // Data transfer instructions - for (int i = 0x88; i <= 0x8B; i++) - { - OneByteOpcodes[i] = "mov"; - } - OneByteOpcodes[0xA0] = "mov"; // MOV AL, moffs8 - OneByteOpcodes[0xA1] = "mov"; // MOV EAX, moffs32 - OneByteOpcodes[0xA2] = "mov"; // MOV moffs8, AL - OneByteOpcodes[0xA3] = "mov"; // MOV moffs32, EAX - for (int i = 0xB0; i <= 0xB7; i++) - { - OneByteOpcodes[i] = "mov"; // MOV r8, imm8 - } - for (int i = 0xB8; i <= 0xBF; i++) - { - OneByteOpcodes[i] = "mov"; // MOV r32, imm32 - } - OneByteOpcodes[0xC6] = "mov"; // MOV r/m8, imm8 - OneByteOpcodes[0xC7] = "mov"; // MOV r/m32, imm32 - - // Push/Pop instructions - for (int i = 0x50; i <= 0x57; i++) - { - OneByteOpcodes[i] = "push"; // PUSH r32 - } - for (int i = 0x58; i <= 0x5F; i++) - { - OneByteOpcodes[i] = "pop"; // POP r32 - } - OneByteOpcodes[0x68] = "push"; // PUSH imm32 - OneByteOpcodes[0x6A] = "push"; // PUSH imm8 - OneByteOpcodes[0x8F] = "pop"; // POP r/m32 - OneByteOpcodes[0x9C] = "pushf"; // PUSHF - OneByteOpcodes[0x9D] = "popf"; // POPF - - // Arithmetic instructions - for (int i = 0x00; i <= 0x05; i++) - { - OneByteOpcodes[i] = "add"; - } - for (int i = 0x28; i <= 0x2D; i++) - { - OneByteOpcodes[i] = "sub"; - } - for (int i = 0x30; i <= 0x35; i++) - { - OneByteOpcodes[i] = "xor"; - } - for (int i = 0x38; i <= 0x3D; i++) - { - OneByteOpcodes[i] = "cmp"; - } - OneByteOpcodes[0x40] = "inc"; // INC eax - OneByteOpcodes[0x41] = "inc"; // INC ecx - OneByteOpcodes[0x42] = "inc"; // INC edx - OneByteOpcodes[0x43] = "inc"; // INC ebx - OneByteOpcodes[0x44] = "inc"; // INC esp - OneByteOpcodes[0x45] = "inc"; // INC ebp - OneByteOpcodes[0x46] = "inc"; // INC esi - OneByteOpcodes[0x47] = "inc"; // INC edi - OneByteOpcodes[0x48] = "dec"; // DEC eax - OneByteOpcodes[0x49] = "dec"; // DEC ecx - OneByteOpcodes[0x4A] = "dec"; // DEC edx - OneByteOpcodes[0x4B] = "dec"; // DEC ebx - OneByteOpcodes[0x4C] = "dec"; // DEC esp - OneByteOpcodes[0x4D] = "dec"; // DEC ebp - OneByteOpcodes[0x4E] = "dec"; // DEC esi - OneByteOpcodes[0x4F] = "dec"; // DEC edi - - // Logical instructions - for (int i = 0x20; i <= 0x25; i++) - { - OneByteOpcodes[i] = "and"; - } - for (int i = 0x08; i <= 0x0D; i++) - { - OneByteOpcodes[i] = "or"; - } - OneByteOpcodes[0xF7] = "not"; // Group 3 - NOT, NEG, MUL, IMUL, DIV, IDIV - - // Shift and rotate instructions - OneByteOpcodes[0xD0] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR - OneByteOpcodes[0xD1] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR - OneByteOpcodes[0xD2] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR - OneByteOpcodes[0xD3] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR - - // Control flow instructions - OneByteOpcodes[0xC3] = "ret"; - OneByteOpcodes[0xC2] = "ret"; - OneByteOpcodes[0xCA] = "retf"; - OneByteOpcodes[0xCB] = "retf"; - OneByteOpcodes[0xCC] = "int3"; - OneByteOpcodes[0xCD] = "int"; - OneByteOpcodes[0xCE] = "into"; - OneByteOpcodes[0xCF] = "iret"; - OneByteOpcodes[0xE8] = "call"; - OneByteOpcodes[0xE9] = "jmp"; - OneByteOpcodes[0xEB] = "jmp"; - OneByteOpcodes[0xFF] = "call"; // Group 5 - CALL, JMP, PUSH - - // Conditional jumps - for (int i = 0x70; i <= 0x7F; i++) - { - OneByteOpcodes[i] = "j" + ConditionCodes[i - 0x70]; - } - - // String instructions - OneByteOpcodes[0xA4] = "movsb"; - OneByteOpcodes[0xA5] = "movsd"; - OneByteOpcodes[0xA6] = "cmpsb"; - OneByteOpcodes[0xA7] = "cmpsd"; - OneByteOpcodes[0xAA] = "stosb"; - OneByteOpcodes[0xAB] = "stosd"; - OneByteOpcodes[0xAC] = "lodsb"; - OneByteOpcodes[0xAD] = "lodsd"; - OneByteOpcodes[0xAE] = "scasb"; - OneByteOpcodes[0xAF] = "scasd"; - - // Misc instructions - OneByteOpcodes[0x90] = "nop"; - OneByteOpcodes[0x91] = "xchg"; // XCHG eax, ecx - OneByteOpcodes[0x92] = "xchg"; // XCHG eax, edx - OneByteOpcodes[0x93] = "xchg"; // XCHG eax, ebx - OneByteOpcodes[0x94] = "xchg"; // XCHG eax, esp - OneByteOpcodes[0x95] = "xchg"; // XCHG eax, ebp - OneByteOpcodes[0x96] = "xchg"; // XCHG eax, esi - OneByteOpcodes[0x97] = "xchg"; // XCHG eax, edi - OneByteOpcodes[0x98] = "cwde"; - OneByteOpcodes[0x99] = "cdq"; - OneByteOpcodes[0xF4] = "hlt"; - OneByteOpcodes[0xF5] = "cmc"; - OneByteOpcodes[0xF8] = "clc"; - OneByteOpcodes[0xF9] = "stc"; - OneByteOpcodes[0xFA] = "cli"; - OneByteOpcodes[0xFB] = "sti"; - OneByteOpcodes[0xFC] = "cld"; - OneByteOpcodes[0xFD] = "std"; - } + // List of instruction handlers + private readonly List _handlers; /// /// Initializes a new instance of the InstructionDecoder class @@ -247,18 +41,15 @@ public class InstructionDecoder _codeBuffer = codeBuffer; _position = 0; _length = codeBuffer.Length; - } - - /// - /// Decodes an instruction at the specified position in the code buffer - /// - /// The position in the code buffer - /// The instruction object to populate - /// The number of bytes read - public int DecodeAt(int position, Instruction instruction) - { - _position = position; - return Decode(instruction); + + // Initialize the instruction handlers + _handlers = new List + { + new Group1Handler(_codeBuffer, this, _length), + new FloatingPointHandler(_codeBuffer, this, _length), + new DataTransferHandler(_codeBuffer, this, _length), + new ControlFlowHandler(_codeBuffer, this, _length) + }; } /// @@ -350,238 +141,27 @@ public class InstructionDecoder // Read the opcode byte opcode = _codeBuffer[_position++]; - // Get the mnemonic from the opcode map - string mnemonic = OneByteOpcodes[opcode]; - - // Handle specific opcodes - string operands = string.Empty; - - switch (opcode) + // Try to find a handler for this opcode + bool handled = false; + foreach (var handler in _handlers) { - case 0xDF: // FISTP and other FPU instructions - if (_position < _length) + if (handler.CanHandle(opcode)) + { + handled = handler.Decode(opcode, instruction); + if (handled) { - byte modRM = _codeBuffer[_position++]; - byte mod = (byte)((modRM & MODRM_MOD_MASK) >> 6); - byte reg = (byte)((modRM & MODRM_REG_MASK) >> 3); - byte rm = (byte)(modRM & MODRM_RM_MASK); - - // FISTP with memory operand - if (reg == 7) // FISTP - { - if (mod == 0 && rm == 5) // Displacement only addressing - { - if (_position + 4 <= _length) - { - uint disp32 = BitConverter.ToUInt32(_codeBuffer, _position); - _position += 4; - operands = $"qword ptr [0x{disp32:X8}]"; - } - } - else - { - // Handle other addressing modes if needed - operands = DecodeModRM(mod, rm, true); - } - } + break; } - break; - - case 0xA1: // MOV EAX, memory - if (_position + 4 <= _length) - { - uint addr = BitConverter.ToUInt32(_codeBuffer, _position); - _position += 4; - operands = $"eax, [0x{addr:X8}]"; - } - break; - - case OPCODE_INT3: - // No operands for INT3 - break; - - case OPCODE_NOP: - // No operands for NOP - break; - - case OPCODE_RET: - // No operands for RET - break; - - case OPCODE_CALL_NEAR_RELATIVE: - if (_position + 4 <= _length) - { - // Read 32-bit relative offset - int offset = BitConverter.ToInt32(_codeBuffer, _position); - _position += 4; - - // Calculate target address (relative to next instruction) - uint targetAddress = (uint)(_position + offset); - operands = $"0x{targetAddress:X8}"; - } - break; - - case OPCODE_JMP_NEAR_RELATIVE: - if (_position + 4 <= _length) - { - // Read 32-bit relative offset - int offset = BitConverter.ToInt32(_codeBuffer, _position); - _position += 4; - - // Calculate target address (relative to next instruction) - uint targetAddress = (uint)(_position + offset); - operands = $"0x{targetAddress:X8}"; - } - break; - - case OPCODE_JMP_SHORT_RELATIVE: - if (_position < _length) - { - // Read 8-bit relative offset - sbyte offset = (sbyte)_codeBuffer[_position++]; - - // Calculate target address (relative to next instruction) - uint targetAddress = (uint)(_position + offset); - operands = $"0x{targetAddress:X8}"; - } - break; - - case 0x83: // Group 1 with sign-extended immediate byte - if (_position < _length) - { - byte modRM = _codeBuffer[_position++]; - byte mod = (byte)((modRM & MODRM_MOD_MASK) >> 6); - byte reg = (byte)((modRM & MODRM_REG_MASK) >> 3); // This is the operation type - byte rm = (byte)(modRM & MODRM_RM_MASK); - - // Determine the operation based on reg field - string[] group1Ops = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; - mnemonic = group1Ops[reg]; - - // Decode the destination operand - string destOperand; - if (mod == 3) // Register operand - { - destOperand = RegisterNames32[rm]; - } - else // Memory operand - { - destOperand = DecodeModRM(mod, rm, false); - } - - // Read the immediate byte - if (_position < _length) - { - sbyte imm8 = (sbyte)_codeBuffer[_position++]; - operands = $"{destOperand}, 0x{imm8:X2}"; - } - else - { - operands = $"{destOperand}, ???"; - } - } - break; - - default: - // Handle register-based instructions - if (opcode >= 0x40 && opcode <= 0x47) // INC r32 - { - int reg = opcode - 0x40; - operands = RegisterNames32[reg]; - } - else if (opcode >= 0x48 && opcode <= 0x4F) // DEC r32 - { - int reg = opcode - 0x48; - operands = RegisterNames32[reg]; - } - else if (opcode >= 0x50 && opcode <= 0x57) // PUSH r32 - { - int reg = opcode - 0x50; - operands = RegisterNames32[reg]; - } - else if (opcode >= 0x58 && opcode <= 0x5F) // POP r32 - { - int reg = opcode - 0x58; - operands = RegisterNames32[reg]; - } - else if (opcode >= 0x91 && opcode <= 0x97) // XCHG eax, r32 - { - int reg = opcode - 0x90; - operands = $"eax, {RegisterNames32[reg]}"; - } - else if (opcode >= 0xB0 && opcode <= 0xB7) // MOV r8, imm8 - { - if (_position < _length) - { - int reg = opcode - 0xB0; - byte imm8 = _codeBuffer[_position++]; - operands = $"{RegisterNames8[reg]}, 0x{imm8:X2}"; - } - } - else if (opcode >= 0xB8 && opcode <= 0xBF) // MOV r32, imm32 - { - if (_position + 4 <= _length) - { - int reg = opcode - 0xB8; - uint imm32 = BitConverter.ToUInt32(_codeBuffer, _position); - _position += 4; - operands = $"{RegisterNames32[reg]}, 0x{imm32:X8}"; - } - } - else if (opcode >= 0x70 && opcode <= 0x7F) // Conditional jumps (short) - { - if (_position < _length) - { - sbyte offset = (sbyte)_codeBuffer[_position++]; - uint targetAddress = (uint)(_position + offset); - operands = $"0x{targetAddress:X8}"; - } - } - else if (opcode == 0x68) // PUSH imm32 - { - if (_position + 4 <= _length) - { - uint imm32 = BitConverter.ToUInt32(_codeBuffer, _position); - _position += 4; - operands = $"0x{imm32:X8}"; - } - } - else if (opcode == 0x6A) // PUSH imm8 - { - if (_position < _length) - { - byte imm8 = _codeBuffer[_position++]; - operands = $"0x{imm8:X2}"; - } - } - else if (opcode == 0xCD) // INT imm8 - { - if (_position < _length) - { - byte imm8 = _codeBuffer[_position++]; - operands = $"0x{imm8:X2}"; - } - } - else if (opcode == 0xE3) // JECXZ rel8 - { - if (_position < _length) - { - sbyte offset = (sbyte)_codeBuffer[_position++]; - uint targetAddress = (uint)(_position + offset); - operands = $"0x{targetAddress:X8}"; - } - } - else - { - // For other opcodes, we'll just show the raw bytes for now - // In a full implementation, we would decode the ModR/M byte, SIB byte, etc. - } - break; + } } - // Set the instruction properties - instruction.Mnemonic = mnemonic; - instruction.Operands = operands; + // If no handler was found or the instruction couldn't be decoded, + // use a default mnemonic from the opcode map + if (!handled) + { + instruction.Mnemonic = OpcodeMap.GetMnemonic(opcode); + instruction.Operands = string.Empty; + } // Copy the instruction bytes int bytesRead = _position - startPosition; @@ -592,100 +172,32 @@ public class InstructionDecoder } /// - /// Decodes a ModR/M byte to get the operand string + /// Sets the current position in the code buffer /// - /// The mod field (2 bits) - /// The r/m field (3 bits) - /// True if the operand is 64-bit - /// The operand string - private string DecodeModRM(byte mod, byte rm, bool is64Bit) + /// The new position + public void SetPosition(int position) { - string sizePrefix = is64Bit ? "qword" : "dword"; - - switch (mod) - { - case 0: // [reg] or disp32 - if (rm == 5) // disp32 - { - if (_position + 4 <= _length) - { - uint disp32 = BitConverter.ToUInt32(_codeBuffer, _position); - _position += 4; - return $"{sizePrefix} ptr [0x{disp32:X8}]"; - } - return $"{sizePrefix} ptr [???]"; - } - else if (rm == 4) // SIB - { - // Handle SIB byte - if (_position < _length) - { - byte sib = _codeBuffer[_position++]; - // Decode SIB byte (not implemented yet) - return $"{sizePrefix} ptr [SIB]"; - } - return $"{sizePrefix} ptr [???]"; - } - else - { - return $"{sizePrefix} ptr [{RegisterNames32[rm]}]"; - } - - case 1: // [reg + disp8] - if (rm == 4) // SIB + disp8 - { - // Handle SIB byte - if (_position + 1 < _length) - { - byte sib = _codeBuffer[_position++]; - sbyte disp8 = (sbyte)_codeBuffer[_position++]; - // Decode SIB byte (not implemented yet) - return $"{sizePrefix} ptr [SIB+0x{disp8:X2}]"; - } - return $"{sizePrefix} ptr [???]"; - } - else - { - if (_position < _length) - { - sbyte disp8 = (sbyte)_codeBuffer[_position++]; - string dispStr = disp8 < 0 ? $"-0x{-disp8:X2}" : $"+0x{disp8:X2}"; - return $"{sizePrefix} ptr [{RegisterNames32[rm]}{dispStr}]"; - } - return $"{sizePrefix} ptr [{RegisterNames32[rm]}+???]"; - } - - case 2: // [reg + disp32] - if (rm == 4) // SIB + disp32 - { - // Handle SIB byte - if (_position + 4 < _length) - { - byte sib = _codeBuffer[_position++]; - int disp32 = BitConverter.ToInt32(_codeBuffer, _position); - _position += 4; - // Decode SIB byte (not implemented yet) - return $"{sizePrefix} ptr [SIB+0x{disp32:X8}]"; - } - return $"{sizePrefix} ptr [???]"; - } - else - { - if (_position + 4 <= _length) - { - int disp32 = BitConverter.ToInt32(_codeBuffer, _position); - _position += 4; - string dispStr = disp32 < 0 ? $"-0x{-disp32:X8}" : $"+0x{disp32:X8}"; - return $"{sizePrefix} ptr [{RegisterNames32[rm]}{dispStr}]"; - } - return $"{sizePrefix} ptr [{RegisterNames32[rm]}+???]"; - } - - case 3: // reg - return is64Bit ? "mm" + rm : RegisterNames32[rm]; - - default: - return "???"; - } + _position = position; + } + + /// + /// Gets the current position in the code buffer + /// + /// The current position + public int GetPosition() + { + return _position; + } + + /// + /// Decodes an instruction at the specified position in the code buffer + /// + /// The position in the code buffer + /// The instruction object to populate + /// The number of bytes read + public int DecodeAt(int position, Instruction instruction) + { + _position = position; + return Decode(instruction); } } diff --git a/X86Disassembler/X86/ModRMDecoder.cs b/X86Disassembler/X86/ModRMDecoder.cs new file mode 100644 index 0000000..dcf3ba5 --- /dev/null +++ b/X86Disassembler/X86/ModRMDecoder.cs @@ -0,0 +1,242 @@ +namespace X86Disassembler.X86; + +/// +/// Handles decoding of ModR/M bytes in x86 instructions +/// +public class ModRMDecoder +{ + // ModR/M byte masks + private const byte MOD_MASK = 0xC0; // 11000000b + private const byte REG_MASK = 0x38; // 00111000b + private const byte RM_MASK = 0x07; // 00000111b + + // SIB byte masks + private const byte SIB_SCALE_MASK = 0xC0; // 11000000b + private const byte SIB_INDEX_MASK = 0x38; // 00111000b + private const byte SIB_BASE_MASK = 0x07; // 00000111b + + // Register names + private static readonly string[] RegisterNames8 = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; + private static readonly string[] RegisterNames16 = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; + private static readonly string[] RegisterNames32 = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" }; + + // Buffer containing the code to decode + private readonly byte[] _codeBuffer; + + // The instruction decoder that owns this ModRM decoder + private readonly InstructionDecoder _decoder; + + // Length of the buffer + private readonly int _length; + + /// + /// Initializes a new instance of the ModRMDecoder class + /// + /// The buffer containing the code to decode + /// The instruction decoder that owns this ModRM decoder + /// The length of the buffer + public ModRMDecoder(byte[] codeBuffer, InstructionDecoder decoder, int length) + { + _codeBuffer = codeBuffer; + _decoder = decoder; + _length = length; + } + + /// + /// Decodes a ModR/M byte to get the operand string + /// + /// The mod field (2 bits) + /// The r/m field (3 bits) + /// True if the operand is 64-bit + /// The operand string + public string DecodeModRM(byte mod, byte rm, bool is64Bit) + { + string sizePrefix = is64Bit ? "qword" : "dword"; + int position = _decoder.GetPosition(); + + switch (mod) + { + case 0: // [reg] or disp32 + if (rm == 5) // disp32 + { + if (position + 4 <= _length) + { + uint disp32 = BitConverter.ToUInt32(_codeBuffer, position); + _decoder.SetPosition(position + 4); + return $"{sizePrefix} ptr [0x{disp32:X8}]"; + } + return $"{sizePrefix} ptr [???]"; + } + else if (rm == 4) // SIB + { + // Handle SIB byte + if (position < _length) + { + byte sib = _codeBuffer[position]; + _decoder.SetPosition(position + 1); + return DecodeSIB(sib, 0, is64Bit); + } + return $"{sizePrefix} ptr [???]"; + } + else + { + return $"{sizePrefix} ptr [{RegisterNames32[rm]}]"; + } + + case 1: // [reg + disp8] + if (rm == 4) // SIB + disp8 + { + // Handle SIB byte + if (position + 1 < _length) + { + byte sib = _codeBuffer[position]; + sbyte disp8 = (sbyte)_codeBuffer[position + 1]; + _decoder.SetPosition(position + 2); + return DecodeSIB(sib, disp8, is64Bit); + } + return $"{sizePrefix} ptr [???]"; + } + else + { + if (position < _length) + { + sbyte disp8 = (sbyte)_codeBuffer[position]; + _decoder.SetPosition(position + 1); + string dispStr8 = disp8 < 0 ? $"-0x{-disp8:X2}" : $"+0x{disp8:X2}"; + return $"{sizePrefix} ptr [{RegisterNames32[rm]}{dispStr8}]"; + } + return $"{sizePrefix} ptr [{RegisterNames32[rm]}+???]"; + } + + case 2: // [reg + disp32] + if (rm == 4) // SIB + disp32 + { + // Handle SIB byte + if (position + 4 < _length) + { + byte sib = _codeBuffer[position]; + int disp32 = BitConverter.ToInt32(_codeBuffer, position + 1); + _decoder.SetPosition(position + 5); + return DecodeSIB(sib, disp32, is64Bit); + } + return $"{sizePrefix} ptr [???]"; + } + else + { + if (position + 4 <= _length) + { + int disp32 = BitConverter.ToInt32(_codeBuffer, position); + _decoder.SetPosition(position + 4); + string dispStr32 = disp32 < 0 ? $"-0x{-disp32:X8}" : $"+0x{disp32:X8}"; + return $"{sizePrefix} ptr [{RegisterNames32[rm]}{dispStr32}]"; + } + return $"{sizePrefix} ptr [{RegisterNames32[rm]}+???]"; + } + + case 3: // reg + return is64Bit ? "mm" + rm : RegisterNames32[rm]; + + default: + return "???"; + } + } + + /// + /// Reads and decodes a ModR/M byte + /// + /// True if the operand is 64-bit + /// A tuple containing the mod, reg, rm fields and the decoded operand string + public (byte mod, byte reg, byte rm, string operand) ReadModRM(bool is64Bit = false) + { + int position = _decoder.GetPosition(); + + if (position >= _length) + { + return (0, 0, 0, "???"); + } + + byte modRM = _codeBuffer[position]; + _decoder.SetPosition(position + 1); + + byte mod = (byte)((modRM & MOD_MASK) >> 6); + byte reg = (byte)((modRM & REG_MASK) >> 3); + byte rm = (byte)(modRM & RM_MASK); + + string operand = DecodeModRM(mod, rm, is64Bit); + + return (mod, reg, rm, operand); + } + + /// + /// Decodes a SIB byte + /// + /// The SIB byte + /// The displacement value + /// True if the operand is 64-bit + /// The decoded SIB string + private string DecodeSIB(byte sib, int displacement, bool is64Bit) + { + string sizePrefix = is64Bit ? "qword" : "dword"; + int position = _decoder.GetPosition(); + + byte scale = (byte)((sib & SIB_SCALE_MASK) >> 6); + byte index = (byte)((sib & SIB_INDEX_MASK) >> 3); + byte @base = (byte)(sib & SIB_BASE_MASK); + + // Special case: no index register + if (index == 4) + { + if (@base == 5 && displacement == 0) // Special case: disp32 only + { + if (position + 4 <= _length) + { + uint disp32 = BitConverter.ToUInt32(_codeBuffer, position); + _decoder.SetPosition(position + 4); + return $"{sizePrefix} ptr [0x{disp32:X8}]"; + } + return $"{sizePrefix} ptr [???]"; + } + else + { + string baseDispStr = ""; + if (displacement != 0) + { + baseDispStr = displacement < 0 ? + $"-0x{-displacement:X}" : + $"+0x{displacement:X}"; + } + return $"{sizePrefix} ptr [{RegisterNames32[@base]}{baseDispStr}]"; + } + } + + // Normal case with index register + int scaleFactor = 1 << scale; // 1, 2, 4, or 8 + string scaleStr = scaleFactor > 1 ? $"*{scaleFactor}" : ""; + + string indexDispStr = ""; + if (displacement != 0) + { + indexDispStr = displacement < 0 ? + $"-0x{-displacement:X}" : + $"+0x{displacement:X}"; + } + + return $"{sizePrefix} ptr [{RegisterNames32[@base]}+{RegisterNames32[index]}{scaleStr}{indexDispStr}]"; + } + + /// + /// Gets the register name based on the register index and size + /// + /// The register index + /// The register size (8, 16, or 32 bits) + /// The register name + public static string GetRegisterName(int index, int size) + { + return size switch + { + 8 => RegisterNames8[index], + 16 => RegisterNames16[index], + _ => RegisterNames32[index] + }; + } +} diff --git a/X86Disassembler/X86/OpcodeMap.cs b/X86Disassembler/X86/OpcodeMap.cs new file mode 100644 index 0000000..9a0ee44 --- /dev/null +++ b/X86Disassembler/X86/OpcodeMap.cs @@ -0,0 +1,137 @@ +namespace X86Disassembler.X86; + +/// +/// Provides mapping between opcodes and their mnemonics +/// +public class OpcodeMap +{ + // One-byte opcode map + private static readonly string[] OneByteOpcodes = new string[256]; + + // Condition codes for conditional jumps + private static readonly string[] ConditionCodes = { + "o", "no", "b", "ae", "e", "ne", "be", "a", + "s", "ns", "p", "np", "l", "ge", "le", "g" + }; + + // Group 1 operations (used with opcodes 0x80, 0x81, 0x83) + public static readonly string[] Group1Operations = { + "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" + }; + + // Static constructor to initialize the opcode maps + static OpcodeMap() + { + InitializeOpcodeMaps(); + } + + /// + /// Initializes the opcode maps + /// + private static void InitializeOpcodeMaps() + { + // Initialize all entries to "??" (unknown) + for (int i = 0; i < 256; i++) + { + OneByteOpcodes[i] = "??"; + } + + // Floating-point instructions + OneByteOpcodes[0xD8] = "fadd"; + OneByteOpcodes[0xD9] = "fld"; + OneByteOpcodes[0xDA] = "fiadd"; + OneByteOpcodes[0xDB] = "fild"; + OneByteOpcodes[0xDC] = "fadd"; + OneByteOpcodes[0xDD] = "fld"; + OneByteOpcodes[0xDE] = "fiadd"; + OneByteOpcodes[0xDF] = "fistp"; + + // Group 1 instructions (ADD, OR, ADC, SBB, AND, SUB, XOR, CMP) + OneByteOpcodes[0x80] = "group1b"; + OneByteOpcodes[0x81] = "group1d"; + OneByteOpcodes[0x83] = "group1s"; // Sign-extended immediate + + // Data transfer instructions + for (int i = 0x88; i <= 0x8B; i++) + { + OneByteOpcodes[i] = "mov"; + } + OneByteOpcodes[0xA0] = "mov"; // MOV AL, moffs8 + OneByteOpcodes[0xA1] = "mov"; // MOV EAX, moffs32 + OneByteOpcodes[0xA2] = "mov"; // MOV moffs8, AL + OneByteOpcodes[0xA3] = "mov"; // MOV moffs32, EAX + + // Control flow instructions + OneByteOpcodes[0xCC] = "int3"; + OneByteOpcodes[0x90] = "nop"; + OneByteOpcodes[0xC3] = "ret"; + OneByteOpcodes[0xE8] = "call"; + OneByteOpcodes[0xE9] = "jmp"; + OneByteOpcodes[0xEB] = "jmp"; + + // Register operations + for (int i = 0; i <= 7; i++) + { + OneByteOpcodes[0x40 + i] = "inc"; + OneByteOpcodes[0x48 + i] = "dec"; + OneByteOpcodes[0x50 + i] = "push"; + OneByteOpcodes[0x58 + i] = "pop"; + } + + // XCHG instructions + OneByteOpcodes[0x90] = "nop"; // Special case: XCHG eax, eax = NOP + for (int i = 1; i <= 7; i++) + { + OneByteOpcodes[0x90 + i] = "xchg"; + } + + // MOV instructions + for (int i = 0; i <= 7; i++) + { + OneByteOpcodes[0xB0 + i] = "mov"; // MOV r8, imm8 + OneByteOpcodes[0xB8 + i] = "mov"; // MOV r32, imm32 + } + + // Conditional jumps + for (int i = 0; i <= 0xF; i++) + { + OneByteOpcodes[0x70 + i] = "j" + ConditionCodes[i]; + } + + // Other common instructions + OneByteOpcodes[0x68] = "push"; // PUSH imm32 + OneByteOpcodes[0x6A] = "push"; // PUSH imm8 + OneByteOpcodes[0xCD] = "int"; // INT imm8 + OneByteOpcodes[0xE3] = "jecxz"; // JECXZ rel8 + } + + /// + /// Gets the mnemonic for a one-byte opcode + /// + /// The opcode + /// The mnemonic + public static string GetMnemonic(byte opcode) + { + return OneByteOpcodes[opcode]; + } + + /// + /// Checks if the opcode is a Group 1 opcode + /// + /// The opcode to check + /// True if the opcode is a Group 1 opcode + public static bool IsGroup1Opcode(byte opcode) + { + return opcode == 0x80 || opcode == 0x81 || opcode == 0x83; + } + + /// + /// Checks if the opcode is a floating-point instruction + /// + /// The opcode to check + /// True if the opcode is a floating-point instruction + public static bool IsFloatingPointOpcode(byte opcode) + { + return opcode >= 0xD8 && opcode <= 0xDF; + } +}