0
mirror of https://github.com/sampletext32/ParkanPlayground.git synced 2025-05-19 11:51:17 +03:00

Refactored instruction decoder into smaller, more maintainable components using handler pattern

This commit is contained in:
bird_egop 2025-04-12 19:18:52 +03:00
parent 2e6e133159
commit dffc405c10
8 changed files with 1361 additions and 544 deletions

View File

@ -0,0 +1,281 @@
namespace X86Disassembler.X86.Handlers;
/// <summary>
/// Handler for control flow instructions (JMP, CALL, RET, etc.)
/// </summary>
public class ControlFlowHandler : InstructionHandler
{
// Condition codes for conditional jumps
private static readonly string[] ConditionCodes = {
"o", "no", "b", "ae", "e", "ne", "be", "a",
"s", "ns", "p", "np", "l", "ge", "le", "g"
};
/// <summary>
/// Initializes a new instance of the ControlFlowHandler class
/// </summary>
/// <param name="codeBuffer">The buffer containing the code to decode</param>
/// <param name="decoder">The instruction decoder that owns this handler</param>
/// <param name="length">The length of the buffer</param>
public ControlFlowHandler(byte[] codeBuffer, InstructionDecoder decoder, int length)
: base(codeBuffer, decoder, length)
{
}
/// <summary>
/// Checks if this handler can decode the given opcode
/// </summary>
/// <param name="opcode">The opcode to check</param>
/// <returns>True if this handler can decode the opcode</returns>
public override bool CanHandle(byte opcode)
{
// RET instruction
if (opcode == 0xC3 || opcode == 0xC2)
{
return true;
}
// CALL instruction
if (opcode == 0xE8)
{
return true;
}
// JMP instructions
if (opcode == 0xE9 || opcode == 0xEB)
{
return true;
}
// Conditional jumps
if (opcode >= 0x70 && opcode <= 0x7F)
{
return true;
}
// INT instructions
if (opcode == 0xCC || opcode == 0xCD)
{
return true;
}
// JECXZ instruction
if (opcode == 0xE3)
{
return true;
}
return false;
}
/// <summary>
/// Decodes a control flow instruction
/// </summary>
/// <param name="opcode">The opcode of the instruction</param>
/// <param name="instruction">The instruction object to populate</param>
/// <returns>True if the instruction was successfully decoded</returns>
public override bool Decode(byte opcode, Instruction instruction)
{
// Set the mnemonic based on the opcode
instruction.Mnemonic = OpcodeMap.GetMnemonic(opcode);
// Handle different types of control flow instructions
if (opcode == 0xC3) // RET
{
// No operands for RET
instruction.Operands = string.Empty;
return true;
}
else if (opcode == 0xC2) // RET imm16
{
return DecodeRETImm16(instruction);
}
else if (opcode == 0xE8) // CALL rel32
{
return DecodeCALLRel32(instruction);
}
else if (opcode == 0xE9) // JMP rel32
{
return DecodeJMPRel32(instruction);
}
else if (opcode == 0xEB) // JMP rel8
{
return DecodeJMPRel8(instruction);
}
else if (opcode >= 0x70 && opcode <= 0x7F) // Conditional jumps
{
return DecodeConditionalJump(opcode, instruction);
}
else if (opcode == 0xCC) // INT3
{
// No operands for INT3
instruction.Operands = string.Empty;
return true;
}
else if (opcode == 0xCD) // INT imm8
{
return DecodeINTImm8(instruction);
}
else if (opcode == 0xE3) // JECXZ rel8
{
return DecodeJECXZRel8(instruction);
}
return false;
}
/// <summary>
/// Decodes a RET instruction with 16-bit immediate operand
/// </summary>
private bool DecodeRETImm16(Instruction instruction)
{
int position = Decoder.GetPosition();
if (position + 2 > Length)
{
return false;
}
// Read the immediate value
ushort imm16 = BitConverter.ToUInt16(CodeBuffer, position);
Decoder.SetPosition(position + 2);
instruction.Operands = $"0x{imm16:X4}";
return true;
}
/// <summary>
/// Decodes a CALL instruction with 32-bit relative offset
/// </summary>
private bool DecodeCALLRel32(Instruction instruction)
{
int position = Decoder.GetPosition();
if (position + 4 > Length)
{
return false;
}
// Read the relative offset
int offset = BitConverter.ToInt32(CodeBuffer, position);
Decoder.SetPosition(position + 4);
// Calculate the target address (relative to the next instruction)
uint targetAddress = (uint)(position + offset);
instruction.Operands = $"0x{targetAddress:X8}";
return true;
}
/// <summary>
/// Decodes a JMP instruction with 32-bit relative offset
/// </summary>
private bool DecodeJMPRel32(Instruction instruction)
{
int position = Decoder.GetPosition();
if (position + 4 > Length)
{
return false;
}
// Read the relative offset
int offset = BitConverter.ToInt32(CodeBuffer, position);
Decoder.SetPosition(position + 4);
// Calculate the target address (relative to the next instruction)
uint targetAddress = (uint)(position + offset);
instruction.Operands = $"0x{targetAddress:X8}";
return true;
}
/// <summary>
/// Decodes a JMP instruction with 8-bit relative offset
/// </summary>
private bool DecodeJMPRel8(Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// Read the relative offset
sbyte offset = (sbyte)CodeBuffer[position];
Decoder.SetPosition(position + 1);
// Calculate the target address (relative to the next instruction)
uint targetAddress = (uint)(position + offset + 1); // +1 because the offset is relative to the next instruction
instruction.Operands = $"0x{targetAddress:X8}";
return true;
}
/// <summary>
/// Decodes a conditional jump instruction
/// </summary>
private bool DecodeConditionalJump(byte opcode, Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// Read the relative offset
sbyte offset = (sbyte)CodeBuffer[position];
Decoder.SetPosition(position + 1);
// Calculate the target address (relative to the next instruction)
uint targetAddress = (uint)(position + offset + 1); // +1 because the offset is relative to the next instruction
instruction.Operands = $"0x{targetAddress:X8}";
return true;
}
/// <summary>
/// Decodes an INT instruction with 8-bit immediate operand
/// </summary>
private bool DecodeINTImm8(Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// Read the immediate value
byte imm8 = CodeBuffer[position];
Decoder.SetPosition(position + 1);
instruction.Operands = $"0x{imm8:X2}";
return true;
}
/// <summary>
/// Decodes a JECXZ instruction with 8-bit relative offset
/// </summary>
private bool DecodeJECXZRel8(Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// Read the relative offset
sbyte offset = (sbyte)CodeBuffer[position];
Decoder.SetPosition(position + 1);
// Calculate the target address (relative to the next instruction)
uint targetAddress = (uint)(position + offset + 1); // +1 because the offset is relative to the next instruction
instruction.Operands = $"0x{targetAddress:X8}";
return true;
}
}

View File

@ -0,0 +1,326 @@
namespace X86Disassembler.X86.Handlers;
/// <summary>
/// Handler for data transfer instructions (MOV, PUSH, POP, etc.)
/// </summary>
public class DataTransferHandler : InstructionHandler
{
/// <summary>
/// Initializes a new instance of the DataTransferHandler class
/// </summary>
/// <param name="codeBuffer">The buffer containing the code to decode</param>
/// <param name="decoder">The instruction decoder that owns this handler</param>
/// <param name="length">The length of the buffer</param>
public DataTransferHandler(byte[] codeBuffer, InstructionDecoder decoder, int length)
: base(codeBuffer, decoder, length)
{
}
/// <summary>
/// Checks if this handler can decode the given opcode
/// </summary>
/// <param name="opcode">The opcode to check</param>
/// <returns>True if this handler can decode the opcode</returns>
public override bool CanHandle(byte opcode)
{
// MOV instructions
if ((opcode >= 0x88 && opcode <= 0x8B) || // MOV r/m, r and MOV r, r/m
(opcode >= 0xB0 && opcode <= 0xB7) || // MOV r8, imm8
(opcode >= 0xB8 && opcode <= 0xBF) || // MOV r32, imm32
opcode == 0xA0 || opcode == 0xA1 || // MOV AL/EAX, moffs
opcode == 0xA2 || opcode == 0xA3) // MOV moffs, AL/EAX
{
return true;
}
// PUSH instructions
if ((opcode >= 0x50 && opcode <= 0x57) || // PUSH r32
opcode == 0x68 || opcode == 0x6A) // PUSH imm32/imm8
{
return true;
}
// POP instructions
if (opcode >= 0x58 && opcode <= 0x5F) // POP r32
{
return true;
}
// XCHG instructions
if (opcode >= 0x90 && opcode <= 0x97) // XCHG EAX, r32
{
return true;
}
return false;
}
/// <summary>
/// Decodes a data transfer instruction
/// </summary>
/// <param name="opcode">The opcode of the instruction</param>
/// <param name="instruction">The instruction object to populate</param>
/// <returns>True if the instruction was successfully decoded</returns>
public override bool Decode(byte opcode, Instruction instruction)
{
// Set the mnemonic based on the opcode
instruction.Mnemonic = OpcodeMap.GetMnemonic(opcode);
// Handle different types of data transfer instructions
if (opcode >= 0x88 && opcode <= 0x8B) // MOV r/m, r and MOV r, r/m
{
return DecodeMOVRegMem(opcode, instruction);
}
else if (opcode >= 0xB0 && opcode <= 0xB7) // MOV r8, imm8
{
return DecodeMOVRegImm8(opcode, instruction);
}
else if (opcode >= 0xB8 && opcode <= 0xBF) // MOV r32, imm32
{
return DecodeMOVRegImm32(opcode, instruction);
}
else if (opcode == 0xA0 || opcode == 0xA1) // MOV AL/EAX, moffs
{
return DecodeMOVAccMem(opcode, instruction);
}
else if (opcode == 0xA2 || opcode == 0xA3) // MOV moffs, AL/EAX
{
return DecodeMOVMemAcc(opcode, instruction);
}
else if (opcode >= 0x50 && opcode <= 0x57) // PUSH r32
{
return DecodePUSHReg(opcode, instruction);
}
else if (opcode == 0x68) // PUSH imm32
{
return DecodePUSHImm32(instruction);
}
else if (opcode == 0x6A) // PUSH imm8
{
return DecodePUSHImm8(instruction);
}
else if (opcode >= 0x58 && opcode <= 0x5F) // POP r32
{
return DecodePOPReg(opcode, instruction);
}
else if (opcode >= 0x90 && opcode <= 0x97) // XCHG EAX, r32
{
return DecodeXCHGEAXReg(opcode, instruction);
}
return false;
}
/// <summary>
/// Decodes a MOV instruction with register and memory operands
/// </summary>
private bool DecodeMOVRegMem(byte opcode, Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// Read the ModR/M byte
var (mod, reg, rm, memOperand) = ModRMDecoder.ReadModRM();
// Determine direction (0 = r/m to reg, 1 = reg to r/m)
bool direction = (opcode & 0x02) != 0;
// Determine operand size (0 = 8-bit, 1 = 32-bit)
bool operandSize32 = (opcode & 0x01) != 0;
// Get register name based on size
string regName = ModRMDecoder.GetRegisterName(reg, operandSize32 ? 32 : 8);
// For mod == 3, both operands are registers
if (mod == 3)
{
string rmRegName = ModRMDecoder.GetRegisterName(rm, operandSize32 ? 32 : 8);
instruction.Operands = direction ? $"{rmRegName}, {regName}" : $"{regName}, {rmRegName}";
}
else // Memory operand
{
instruction.Operands = direction ? $"{memOperand}, {regName}" : $"{regName}, {memOperand}";
}
return true;
}
/// <summary>
/// Decodes a MOV instruction with 8-bit register and immediate operand
/// </summary>
private bool DecodeMOVRegImm8(byte opcode, Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// Register is encoded in the low 3 bits of the opcode
int reg = opcode & 0x07;
string regName = ModRMDecoder.GetRegisterName(reg, 8);
// Read the immediate value
byte imm8 = CodeBuffer[position];
Decoder.SetPosition(position + 1);
instruction.Operands = $"{regName}, 0x{imm8:X2}";
return true;
}
/// <summary>
/// Decodes a MOV instruction with 32-bit register and immediate operand
/// </summary>
private bool DecodeMOVRegImm32(byte opcode, Instruction instruction)
{
int position = Decoder.GetPosition();
if (position + 4 > Length)
{
return false;
}
// Register is encoded in the low 3 bits of the opcode
int reg = opcode & 0x07;
string regName = ModRMDecoder.GetRegisterName(reg, 32);
// Read the immediate value
uint imm32 = BitConverter.ToUInt32(CodeBuffer, position);
Decoder.SetPosition(position + 4);
instruction.Operands = $"{regName}, 0x{imm32:X8}";
return true;
}
/// <summary>
/// Decodes a MOV instruction with accumulator (AL/EAX) and memory operand
/// </summary>
private bool DecodeMOVAccMem(byte opcode, Instruction instruction)
{
int position = Decoder.GetPosition();
if (position + 4 > Length)
{
return false;
}
// Determine operand size (0xA0 = 8-bit, 0xA1 = 32-bit)
bool operandSize32 = opcode == 0xA1;
string regName = operandSize32 ? "eax" : "al";
// Read the memory offset
uint offset = BitConverter.ToUInt32(CodeBuffer, position);
Decoder.SetPosition(position + 4);
instruction.Operands = $"{regName}, [0x{offset:X8}]";
return true;
}
/// <summary>
/// Decodes a MOV instruction with memory operand and accumulator (AL/EAX)
/// </summary>
private bool DecodeMOVMemAcc(byte opcode, Instruction instruction)
{
int position = Decoder.GetPosition();
if (position + 4 > Length)
{
return false;
}
// Determine operand size (0xA2 = 8-bit, 0xA3 = 32-bit)
bool operandSize32 = opcode == 0xA3;
string regName = operandSize32 ? "eax" : "al";
// Read the memory offset
uint offset = BitConverter.ToUInt32(CodeBuffer, position);
Decoder.SetPosition(position + 4);
instruction.Operands = $"[0x{offset:X8}], {regName}";
return true;
}
/// <summary>
/// Decodes a PUSH instruction with register operand
/// </summary>
private bool DecodePUSHReg(byte opcode, Instruction instruction)
{
// Register is encoded in the low 3 bits of the opcode
int reg = opcode & 0x07;
string regName = ModRMDecoder.GetRegisterName(reg, 32);
instruction.Operands = regName;
return true;
}
/// <summary>
/// Decodes a PUSH instruction with 32-bit immediate operand
/// </summary>
private bool DecodePUSHImm32(Instruction instruction)
{
int position = Decoder.GetPosition();
if (position + 4 > Length)
{
return false;
}
// Read the immediate value
uint imm32 = BitConverter.ToUInt32(CodeBuffer, position);
Decoder.SetPosition(position + 4);
instruction.Operands = $"0x{imm32:X8}";
return true;
}
/// <summary>
/// Decodes a PUSH instruction with 8-bit immediate operand
/// </summary>
private bool DecodePUSHImm8(Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// Read the immediate value
byte imm8 = CodeBuffer[position];
Decoder.SetPosition(position + 1);
instruction.Operands = $"0x{imm8:X2}";
return true;
}
/// <summary>
/// Decodes a POP instruction with register operand
/// </summary>
private bool DecodePOPReg(byte opcode, Instruction instruction)
{
// Register is encoded in the low 3 bits of the opcode
int reg = opcode & 0x07;
string regName = ModRMDecoder.GetRegisterName(reg, 32);
instruction.Operands = regName;
return true;
}
/// <summary>
/// Decodes an XCHG instruction with EAX and register operands
/// </summary>
private bool DecodeXCHGEAXReg(byte opcode, Instruction instruction)
{
// Register is encoded in the low 3 bits of the opcode
int reg = opcode & 0x07;
string regName = ModRMDecoder.GetRegisterName(reg, 32);
instruction.Operands = $"eax, {regName}";
return true;
}
}

View File

@ -0,0 +1,167 @@
namespace X86Disassembler.X86.Handlers;
/// <summary>
/// Handler for floating-point instructions (D8-DF opcodes)
/// </summary>
public class FloatingPointHandler : InstructionHandler
{
// Floating-point instruction mnemonics based on opcode and ModR/M reg field
private static readonly string[][] FpuMnemonics = new string[8][];
/// <summary>
/// Static constructor to initialize the FPU mnemonic tables
/// </summary>
static FloatingPointHandler()
{
InitializeFpuMnemonics();
}
/// <summary>
/// Initializes the FPU mnemonic tables
/// </summary>
private static void InitializeFpuMnemonics()
{
// Initialize all tables
for (int i = 0; i < 8; i++)
{
FpuMnemonics[i] = new string[8];
for (int j = 0; j < 8; j++)
{
FpuMnemonics[i][j] = "??";
}
}
// D8 opcode - operations on float32
FpuMnemonics[0][0] = "fadd";
FpuMnemonics[0][1] = "fmul";
FpuMnemonics[0][2] = "fcom";
FpuMnemonics[0][3] = "fcomp";
FpuMnemonics[0][4] = "fsub";
FpuMnemonics[0][5] = "fsubr";
FpuMnemonics[0][6] = "fdiv";
FpuMnemonics[0][7] = "fdivr";
// D9 opcode - load, store, and control operations
FpuMnemonics[1][0] = "fld";
FpuMnemonics[1][2] = "fst";
FpuMnemonics[1][3] = "fstp";
FpuMnemonics[1][4] = "fldenv";
FpuMnemonics[1][5] = "fldcw";
FpuMnemonics[1][6] = "fnstenv";
FpuMnemonics[1][7] = "fnstcw";
// DA opcode - operations on int32
FpuMnemonics[2][0] = "fiadd";
FpuMnemonics[2][1] = "fimul";
FpuMnemonics[2][2] = "ficom";
FpuMnemonics[2][3] = "ficomp";
FpuMnemonics[2][4] = "fisub";
FpuMnemonics[2][5] = "fisubr";
FpuMnemonics[2][6] = "fidiv";
FpuMnemonics[2][7] = "fidivr";
// DB opcode - load/store int32, misc
FpuMnemonics[3][0] = "fild";
FpuMnemonics[3][2] = "fist";
FpuMnemonics[3][3] = "fistp";
FpuMnemonics[3][5] = "fld";
FpuMnemonics[3][7] = "fstp";
// DC opcode - operations on float64
FpuMnemonics[4][0] = "fadd";
FpuMnemonics[4][1] = "fmul";
FpuMnemonics[4][2] = "fcom";
FpuMnemonics[4][3] = "fcomp";
FpuMnemonics[4][4] = "fsub";
FpuMnemonics[4][5] = "fsubr";
FpuMnemonics[4][6] = "fdiv";
FpuMnemonics[4][7] = "fdivr";
// DD opcode - load/store float64
FpuMnemonics[5][0] = "fld";
FpuMnemonics[5][2] = "fst";
FpuMnemonics[5][3] = "fstp";
FpuMnemonics[5][4] = "frstor";
FpuMnemonics[5][6] = "fnsave";
FpuMnemonics[5][7] = "fnstsw";
// DE opcode - operations on int16
FpuMnemonics[6][0] = "fiadd";
FpuMnemonics[6][1] = "fimul";
FpuMnemonics[6][2] = "ficom";
FpuMnemonics[6][3] = "ficomp";
FpuMnemonics[6][4] = "fisub";
FpuMnemonics[6][5] = "fisubr";
FpuMnemonics[6][6] = "fidiv";
FpuMnemonics[6][7] = "fidivr";
// DF opcode - load/store int16, misc
FpuMnemonics[7][0] = "fild";
FpuMnemonics[7][2] = "fist";
FpuMnemonics[7][3] = "fistp";
FpuMnemonics[7][4] = "fbld";
FpuMnemonics[7][5] = "fild";
FpuMnemonics[7][6] = "fbstp";
FpuMnemonics[7][7] = "fistp";
}
/// <summary>
/// Initializes a new instance of the FloatingPointHandler class
/// </summary>
/// <param name="codeBuffer">The buffer containing the code to decode</param>
/// <param name="decoder">The instruction decoder that owns this handler</param>
/// <param name="length">The length of the buffer</param>
public FloatingPointHandler(byte[] codeBuffer, InstructionDecoder decoder, int length)
: base(codeBuffer, decoder, length)
{
}
/// <summary>
/// Checks if this handler can decode the given opcode
/// </summary>
/// <param name="opcode">The opcode to check</param>
/// <returns>True if this handler can decode the opcode</returns>
public override bool CanHandle(byte opcode)
{
return opcode >= 0xD8 && opcode <= 0xDF;
}
/// <summary>
/// Decodes a floating-point instruction
/// </summary>
/// <param name="opcode">The opcode of the instruction</param>
/// <param name="instruction">The instruction object to populate</param>
/// <returns>True if the instruction was successfully decoded</returns>
public override bool Decode(byte opcode, Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// The opcode index in our tables (0-7 for D8-DF)
int opcodeIndex = opcode - 0xD8;
// Read the ModR/M byte
var (mod, reg, rm, operand) = ModRMDecoder.ReadModRM(opcodeIndex == 7); // DF uses 64-bit operands
// Set the mnemonic based on the opcode and reg field
instruction.Mnemonic = FpuMnemonics[opcodeIndex][reg];
// For memory operands, set the operand
if (mod != 3) // Memory operand
{
instruction.Operands = operand;
}
else // Register operand (ST(i))
{
// For register operands, we need to handle the stack registers
// This is a simplified implementation and may need to be expanded
instruction.Operands = $"st({rm})";
}
return true;
}
}

View File

@ -0,0 +1,104 @@
namespace X86Disassembler.X86.Handlers;
/// <summary>
/// Handler for Group 1 instructions (ADD, OR, ADC, SBB, AND, SUB, XOR, CMP)
/// </summary>
public class Group1Handler : InstructionHandler
{
/// <summary>
/// Initializes a new instance of the Group1Handler class
/// </summary>
/// <param name="codeBuffer">The buffer containing the code to decode</param>
/// <param name="decoder">The instruction decoder that owns this handler</param>
/// <param name="length">The length of the buffer</param>
public Group1Handler(byte[] codeBuffer, InstructionDecoder decoder, int length)
: base(codeBuffer, decoder, length)
{
}
/// <summary>
/// Checks if this handler can decode the given opcode
/// </summary>
/// <param name="opcode">The opcode to check</param>
/// <returns>True if this handler can decode the opcode</returns>
public override bool CanHandle(byte opcode)
{
return opcode == 0x80 || opcode == 0x81 || opcode == 0x83;
}
/// <summary>
/// Decodes a Group 1 instruction
/// </summary>
/// <param name="opcode">The opcode of the instruction</param>
/// <param name="instruction">The instruction object to populate</param>
/// <returns>True if the instruction was successfully decoded</returns>
public override bool Decode(byte opcode, Instruction instruction)
{
int position = Decoder.GetPosition();
if (position >= Length)
{
return false;
}
// Read the ModR/M byte
var (mod, reg, rm, destOperand) = ModRMDecoder.ReadModRM();
// Determine the operation based on reg field
instruction.Mnemonic = OpcodeMap.Group1Operations[reg];
// Read the immediate value based on opcode
string immOperand;
position = Decoder.GetPosition();
switch (opcode)
{
case 0x80: // 8-bit immediate
if (position < Length)
{
byte imm8 = CodeBuffer[position];
Decoder.SetPosition(position + 1);
immOperand = $"0x{imm8:X2}";
}
else
{
immOperand = "???";
}
break;
case 0x81: // 32-bit immediate
if (position + 4 <= Length)
{
uint imm32 = BitConverter.ToUInt32(CodeBuffer, position);
Decoder.SetPosition(position + 4);
immOperand = $"0x{imm32:X8}";
}
else
{
immOperand = "???";
}
break;
case 0x83: // 8-bit sign-extended immediate
if (position < Length)
{
sbyte imm8 = (sbyte)CodeBuffer[position];
Decoder.SetPosition(position + 1);
immOperand = $"0x{imm8:X2}";
}
else
{
immOperand = "???";
}
break;
default:
return false;
}
// Set the operands
instruction.Operands = $"{destOperand}, {immOperand}";
return true;
}
}

View File

@ -0,0 +1,48 @@
namespace X86Disassembler.X86.Handlers;
/// <summary>
/// Base class for all instruction handlers
/// </summary>
public abstract class InstructionHandler
{
// Buffer containing the code to decode
protected readonly byte[] CodeBuffer;
// The instruction decoder that owns this handler
protected readonly InstructionDecoder Decoder;
// Length of the buffer
protected readonly int Length;
// ModRM decoder for handling addressing modes
protected readonly ModRMDecoder ModRMDecoder;
/// <summary>
/// Initializes a new instance of the InstructionHandler class
/// </summary>
/// <param name="codeBuffer">The buffer containing the code to decode</param>
/// <param name="decoder">The instruction decoder that owns this handler</param>
/// <param name="length">The length of the buffer</param>
protected InstructionHandler(byte[] codeBuffer, InstructionDecoder decoder, int length)
{
CodeBuffer = codeBuffer;
Decoder = decoder;
Length = length;
ModRMDecoder = new ModRMDecoder(codeBuffer, decoder, length);
}
/// <summary>
/// Checks if this handler can decode the given opcode
/// </summary>
/// <param name="opcode">The opcode to check</param>
/// <returns>True if this handler can decode the opcode</returns>
public abstract bool CanHandle(byte opcode);
/// <summary>
/// Decodes an instruction
/// </summary>
/// <param name="opcode">The opcode of the instruction</param>
/// <param name="instruction">The instruction object to populate</param>
/// <returns>True if the instruction was successfully decoded</returns>
public abstract bool Decode(byte opcode, Instruction instruction);
}

View File

@ -1,11 +1,13 @@
namespace X86Disassembler.X86;
using X86Disassembler.X86.Handlers;
/// <summary>
/// Decoder for x86 instructions
/// Decodes x86 instructions
/// </summary>
public class InstructionDecoder
{
// Instruction prefixes
// Instruction prefix bytes
private const byte PREFIX_LOCK = 0xF0;
private const byte PREFIX_REPNE = 0xF2;
private const byte PREFIX_REP = 0xF3;
@ -18,44 +20,6 @@ public class InstructionDecoder
private const byte PREFIX_OPERAND_SIZE = 0x66;
private const byte PREFIX_ADDRESS_SIZE = 0x67;
// Common opcodes
private const byte OPCODE_INT3 = 0xCC;
private const byte OPCODE_NOP = 0x90;
private const byte OPCODE_RET = 0xC3;
private const byte OPCODE_CALL_NEAR_RELATIVE = 0xE8;
private const byte OPCODE_JMP_NEAR_RELATIVE = 0xE9;
private const byte OPCODE_JMP_SHORT_RELATIVE = 0xEB;
// Opcode groups
private const byte OPCODE_GROUP_1_BYTE = 0x80;
private const byte OPCODE_GROUP_1_WORD_DWORD = 0x81;
private const byte OPCODE_GROUP_1_BYTE_IMM8 = 0x83;
// ModR/M byte masks
private const byte MODRM_MOD_MASK = 0xC0; // 11000000b
private const byte MODRM_REG_MASK = 0x38; // 00111000b
private const byte MODRM_RM_MASK = 0x07; // 00000111b
// SIB byte masks
private const byte SIB_SCALE_MASK = 0xC0; // 11000000b
private const byte SIB_INDEX_MASK = 0x38; // 00111000b
private const byte SIB_BASE_MASK = 0x07; // 00000111b
// Register names
private static readonly string[] RegisterNames8 = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" };
private static readonly string[] RegisterNames16 = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" };
private static readonly string[] RegisterNames32 = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" };
private static readonly string[] SegmentRegisterNames = { "es", "cs", "ss", "ds", "fs", "gs" };
// Condition codes for conditional jumps
private static readonly string[] ConditionCodes = {
"o", "no", "b", "ae", "e", "ne", "be", "a",
"s", "ns", "p", "np", "l", "ge", "le", "g"
};
// One-byte opcode map
private static readonly string[] OneByteOpcodes = new string[256];
// Buffer containing the code to decode
private readonly byte[] _codeBuffer;
@ -65,178 +29,8 @@ public class InstructionDecoder
// Length of the buffer
private readonly int _length;
/// <summary>
/// Static constructor to initialize the opcode maps
/// </summary>
static InstructionDecoder()
{
InitializeOpcodeMaps();
}
/// <summary>
/// Initializes the opcode maps
/// </summary>
private static void InitializeOpcodeMaps()
{
// Initialize all entries to "??" (unknown)
for (int i = 0; i < 256; i++)
{
OneByteOpcodes[i] = "??";
}
// Floating-point instructions
OneByteOpcodes[0xD8] = "fadd"; // Various FP instructions based on ModR/M
OneByteOpcodes[0xD9] = "fld"; // Various FP instructions based on ModR/M
OneByteOpcodes[0xDA] = "fiadd"; // Various FP instructions based on ModR/M
OneByteOpcodes[0xDB] = "fild"; // Various FP instructions based on ModR/M
OneByteOpcodes[0xDC] = "fadd"; // Various FP instructions based on ModR/M
OneByteOpcodes[0xDD] = "fld"; // Various FP instructions based on ModR/M
OneByteOpcodes[0xDE] = "fiadd"; // Various FP instructions based on ModR/M
OneByteOpcodes[0xDF] = "fistp"; // Various FP instructions based on ModR/M
// Group 1 instructions (ADD, OR, ADC, SBB, AND, SUB, XOR, CMP)
OneByteOpcodes[0x80] = "group1b";
OneByteOpcodes[0x81] = "group1d";
OneByteOpcodes[0x83] = "group1s"; // Sign-extended immediate
// Data transfer instructions
for (int i = 0x88; i <= 0x8B; i++)
{
OneByteOpcodes[i] = "mov";
}
OneByteOpcodes[0xA0] = "mov"; // MOV AL, moffs8
OneByteOpcodes[0xA1] = "mov"; // MOV EAX, moffs32
OneByteOpcodes[0xA2] = "mov"; // MOV moffs8, AL
OneByteOpcodes[0xA3] = "mov"; // MOV moffs32, EAX
for (int i = 0xB0; i <= 0xB7; i++)
{
OneByteOpcodes[i] = "mov"; // MOV r8, imm8
}
for (int i = 0xB8; i <= 0xBF; i++)
{
OneByteOpcodes[i] = "mov"; // MOV r32, imm32
}
OneByteOpcodes[0xC6] = "mov"; // MOV r/m8, imm8
OneByteOpcodes[0xC7] = "mov"; // MOV r/m32, imm32
// Push/Pop instructions
for (int i = 0x50; i <= 0x57; i++)
{
OneByteOpcodes[i] = "push"; // PUSH r32
}
for (int i = 0x58; i <= 0x5F; i++)
{
OneByteOpcodes[i] = "pop"; // POP r32
}
OneByteOpcodes[0x68] = "push"; // PUSH imm32
OneByteOpcodes[0x6A] = "push"; // PUSH imm8
OneByteOpcodes[0x8F] = "pop"; // POP r/m32
OneByteOpcodes[0x9C] = "pushf"; // PUSHF
OneByteOpcodes[0x9D] = "popf"; // POPF
// Arithmetic instructions
for (int i = 0x00; i <= 0x05; i++)
{
OneByteOpcodes[i] = "add";
}
for (int i = 0x28; i <= 0x2D; i++)
{
OneByteOpcodes[i] = "sub";
}
for (int i = 0x30; i <= 0x35; i++)
{
OneByteOpcodes[i] = "xor";
}
for (int i = 0x38; i <= 0x3D; i++)
{
OneByteOpcodes[i] = "cmp";
}
OneByteOpcodes[0x40] = "inc"; // INC eax
OneByteOpcodes[0x41] = "inc"; // INC ecx
OneByteOpcodes[0x42] = "inc"; // INC edx
OneByteOpcodes[0x43] = "inc"; // INC ebx
OneByteOpcodes[0x44] = "inc"; // INC esp
OneByteOpcodes[0x45] = "inc"; // INC ebp
OneByteOpcodes[0x46] = "inc"; // INC esi
OneByteOpcodes[0x47] = "inc"; // INC edi
OneByteOpcodes[0x48] = "dec"; // DEC eax
OneByteOpcodes[0x49] = "dec"; // DEC ecx
OneByteOpcodes[0x4A] = "dec"; // DEC edx
OneByteOpcodes[0x4B] = "dec"; // DEC ebx
OneByteOpcodes[0x4C] = "dec"; // DEC esp
OneByteOpcodes[0x4D] = "dec"; // DEC ebp
OneByteOpcodes[0x4E] = "dec"; // DEC esi
OneByteOpcodes[0x4F] = "dec"; // DEC edi
// Logical instructions
for (int i = 0x20; i <= 0x25; i++)
{
OneByteOpcodes[i] = "and";
}
for (int i = 0x08; i <= 0x0D; i++)
{
OneByteOpcodes[i] = "or";
}
OneByteOpcodes[0xF7] = "not"; // Group 3 - NOT, NEG, MUL, IMUL, DIV, IDIV
// Shift and rotate instructions
OneByteOpcodes[0xD0] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR
OneByteOpcodes[0xD1] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR
OneByteOpcodes[0xD2] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR
OneByteOpcodes[0xD3] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR
// Control flow instructions
OneByteOpcodes[0xC3] = "ret";
OneByteOpcodes[0xC2] = "ret";
OneByteOpcodes[0xCA] = "retf";
OneByteOpcodes[0xCB] = "retf";
OneByteOpcodes[0xCC] = "int3";
OneByteOpcodes[0xCD] = "int";
OneByteOpcodes[0xCE] = "into";
OneByteOpcodes[0xCF] = "iret";
OneByteOpcodes[0xE8] = "call";
OneByteOpcodes[0xE9] = "jmp";
OneByteOpcodes[0xEB] = "jmp";
OneByteOpcodes[0xFF] = "call"; // Group 5 - CALL, JMP, PUSH
// Conditional jumps
for (int i = 0x70; i <= 0x7F; i++)
{
OneByteOpcodes[i] = "j" + ConditionCodes[i - 0x70];
}
// String instructions
OneByteOpcodes[0xA4] = "movsb";
OneByteOpcodes[0xA5] = "movsd";
OneByteOpcodes[0xA6] = "cmpsb";
OneByteOpcodes[0xA7] = "cmpsd";
OneByteOpcodes[0xAA] = "stosb";
OneByteOpcodes[0xAB] = "stosd";
OneByteOpcodes[0xAC] = "lodsb";
OneByteOpcodes[0xAD] = "lodsd";
OneByteOpcodes[0xAE] = "scasb";
OneByteOpcodes[0xAF] = "scasd";
// Misc instructions
OneByteOpcodes[0x90] = "nop";
OneByteOpcodes[0x91] = "xchg"; // XCHG eax, ecx
OneByteOpcodes[0x92] = "xchg"; // XCHG eax, edx
OneByteOpcodes[0x93] = "xchg"; // XCHG eax, ebx
OneByteOpcodes[0x94] = "xchg"; // XCHG eax, esp
OneByteOpcodes[0x95] = "xchg"; // XCHG eax, ebp
OneByteOpcodes[0x96] = "xchg"; // XCHG eax, esi
OneByteOpcodes[0x97] = "xchg"; // XCHG eax, edi
OneByteOpcodes[0x98] = "cwde";
OneByteOpcodes[0x99] = "cdq";
OneByteOpcodes[0xF4] = "hlt";
OneByteOpcodes[0xF5] = "cmc";
OneByteOpcodes[0xF8] = "clc";
OneByteOpcodes[0xF9] = "stc";
OneByteOpcodes[0xFA] = "cli";
OneByteOpcodes[0xFB] = "sti";
OneByteOpcodes[0xFC] = "cld";
OneByteOpcodes[0xFD] = "std";
}
// List of instruction handlers
private readonly List<InstructionHandler> _handlers;
/// <summary>
/// Initializes a new instance of the InstructionDecoder class
@ -247,18 +41,15 @@ public class InstructionDecoder
_codeBuffer = codeBuffer;
_position = 0;
_length = codeBuffer.Length;
}
/// <summary>
/// Decodes an instruction at the specified position in the code buffer
/// </summary>
/// <param name="position">The position in the code buffer</param>
/// <param name="instruction">The instruction object to populate</param>
/// <returns>The number of bytes read</returns>
public int DecodeAt(int position, Instruction instruction)
{
_position = position;
return Decode(instruction);
// Initialize the instruction handlers
_handlers = new List<InstructionHandler>
{
new Group1Handler(_codeBuffer, this, _length),
new FloatingPointHandler(_codeBuffer, this, _length),
new DataTransferHandler(_codeBuffer, this, _length),
new ControlFlowHandler(_codeBuffer, this, _length)
};
}
/// <summary>
@ -350,238 +141,27 @@ public class InstructionDecoder
// Read the opcode
byte opcode = _codeBuffer[_position++];
// Get the mnemonic from the opcode map
string mnemonic = OneByteOpcodes[opcode];
// Handle specific opcodes
string operands = string.Empty;
switch (opcode)
// Try to find a handler for this opcode
bool handled = false;
foreach (var handler in _handlers)
{
case 0xDF: // FISTP and other FPU instructions
if (_position < _length)
if (handler.CanHandle(opcode))
{
handled = handler.Decode(opcode, instruction);
if (handled)
{
byte modRM = _codeBuffer[_position++];
byte mod = (byte)((modRM & MODRM_MOD_MASK) >> 6);
byte reg = (byte)((modRM & MODRM_REG_MASK) >> 3);
byte rm = (byte)(modRM & MODRM_RM_MASK);
// FISTP with memory operand
if (reg == 7) // FISTP
{
if (mod == 0 && rm == 5) // Displacement only addressing
{
if (_position + 4 <= _length)
{
uint disp32 = BitConverter.ToUInt32(_codeBuffer, _position);
_position += 4;
operands = $"qword ptr [0x{disp32:X8}]";
}
}
else
{
// Handle other addressing modes if needed
operands = DecodeModRM(mod, rm, true);
}
}
break;
}
break;
case 0xA1: // MOV EAX, memory
if (_position + 4 <= _length)
{
uint addr = BitConverter.ToUInt32(_codeBuffer, _position);
_position += 4;
operands = $"eax, [0x{addr:X8}]";
}
break;
case OPCODE_INT3:
// No operands for INT3
break;
case OPCODE_NOP:
// No operands for NOP
break;
case OPCODE_RET:
// No operands for RET
break;
case OPCODE_CALL_NEAR_RELATIVE:
if (_position + 4 <= _length)
{
// Read 32-bit relative offset
int offset = BitConverter.ToInt32(_codeBuffer, _position);
_position += 4;
// Calculate target address (relative to next instruction)
uint targetAddress = (uint)(_position + offset);
operands = $"0x{targetAddress:X8}";
}
break;
case OPCODE_JMP_NEAR_RELATIVE:
if (_position + 4 <= _length)
{
// Read 32-bit relative offset
int offset = BitConverter.ToInt32(_codeBuffer, _position);
_position += 4;
// Calculate target address (relative to next instruction)
uint targetAddress = (uint)(_position + offset);
operands = $"0x{targetAddress:X8}";
}
break;
case OPCODE_JMP_SHORT_RELATIVE:
if (_position < _length)
{
// Read 8-bit relative offset
sbyte offset = (sbyte)_codeBuffer[_position++];
// Calculate target address (relative to next instruction)
uint targetAddress = (uint)(_position + offset);
operands = $"0x{targetAddress:X8}";
}
break;
case 0x83: // Group 1 with sign-extended immediate byte
if (_position < _length)
{
byte modRM = _codeBuffer[_position++];
byte mod = (byte)((modRM & MODRM_MOD_MASK) >> 6);
byte reg = (byte)((modRM & MODRM_REG_MASK) >> 3); // This is the operation type
byte rm = (byte)(modRM & MODRM_RM_MASK);
// Determine the operation based on reg field
string[] group1Ops = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
mnemonic = group1Ops[reg];
// Decode the destination operand
string destOperand;
if (mod == 3) // Register operand
{
destOperand = RegisterNames32[rm];
}
else // Memory operand
{
destOperand = DecodeModRM(mod, rm, false);
}
// Read the immediate byte
if (_position < _length)
{
sbyte imm8 = (sbyte)_codeBuffer[_position++];
operands = $"{destOperand}, 0x{imm8:X2}";
}
else
{
operands = $"{destOperand}, ???";
}
}
break;
default:
// Handle register-based instructions
if (opcode >= 0x40 && opcode <= 0x47) // INC r32
{
int reg = opcode - 0x40;
operands = RegisterNames32[reg];
}
else if (opcode >= 0x48 && opcode <= 0x4F) // DEC r32
{
int reg = opcode - 0x48;
operands = RegisterNames32[reg];
}
else if (opcode >= 0x50 && opcode <= 0x57) // PUSH r32
{
int reg = opcode - 0x50;
operands = RegisterNames32[reg];
}
else if (opcode >= 0x58 && opcode <= 0x5F) // POP r32
{
int reg = opcode - 0x58;
operands = RegisterNames32[reg];
}
else if (opcode >= 0x91 && opcode <= 0x97) // XCHG eax, r32
{
int reg = opcode - 0x90;
operands = $"eax, {RegisterNames32[reg]}";
}
else if (opcode >= 0xB0 && opcode <= 0xB7) // MOV r8, imm8
{
if (_position < _length)
{
int reg = opcode - 0xB0;
byte imm8 = _codeBuffer[_position++];
operands = $"{RegisterNames8[reg]}, 0x{imm8:X2}";
}
}
else if (opcode >= 0xB8 && opcode <= 0xBF) // MOV r32, imm32
{
if (_position + 4 <= _length)
{
int reg = opcode - 0xB8;
uint imm32 = BitConverter.ToUInt32(_codeBuffer, _position);
_position += 4;
operands = $"{RegisterNames32[reg]}, 0x{imm32:X8}";
}
}
else if (opcode >= 0x70 && opcode <= 0x7F) // Conditional jumps (short)
{
if (_position < _length)
{
sbyte offset = (sbyte)_codeBuffer[_position++];
uint targetAddress = (uint)(_position + offset);
operands = $"0x{targetAddress:X8}";
}
}
else if (opcode == 0x68) // PUSH imm32
{
if (_position + 4 <= _length)
{
uint imm32 = BitConverter.ToUInt32(_codeBuffer, _position);
_position += 4;
operands = $"0x{imm32:X8}";
}
}
else if (opcode == 0x6A) // PUSH imm8
{
if (_position < _length)
{
byte imm8 = _codeBuffer[_position++];
operands = $"0x{imm8:X2}";
}
}
else if (opcode == 0xCD) // INT imm8
{
if (_position < _length)
{
byte imm8 = _codeBuffer[_position++];
operands = $"0x{imm8:X2}";
}
}
else if (opcode == 0xE3) // JECXZ rel8
{
if (_position < _length)
{
sbyte offset = (sbyte)_codeBuffer[_position++];
uint targetAddress = (uint)(_position + offset);
operands = $"0x{targetAddress:X8}";
}
}
else
{
// For other opcodes, we'll just show the raw bytes for now
// In a full implementation, we would decode the ModR/M byte, SIB byte, etc.
}
break;
}
}
// Set the instruction properties
instruction.Mnemonic = mnemonic;
instruction.Operands = operands;
// If no handler was found or the instruction couldn't be decoded,
// use a default mnemonic from the opcode map
if (!handled)
{
instruction.Mnemonic = OpcodeMap.GetMnemonic(opcode);
instruction.Operands = string.Empty;
}
// Copy the instruction bytes
int bytesRead = _position - startPosition;
@ -592,100 +172,32 @@ public class InstructionDecoder
}
/// <summary>
/// Decodes a ModR/M byte to get the operand string
/// Sets the current position in the code buffer
/// </summary>
/// <param name="mod">The mod field (2 bits)</param>
/// <param name="rm">The r/m field (3 bits)</param>
/// <param name="is64Bit">True if the operand is 64-bit</param>
/// <returns>The operand string</returns>
private string DecodeModRM(byte mod, byte rm, bool is64Bit)
/// <param name="position">The new position</param>
public void SetPosition(int position)
{
string sizePrefix = is64Bit ? "qword" : "dword";
_position = position;
}
switch (mod)
{
case 0: // [reg] or disp32
if (rm == 5) // disp32
{
if (_position + 4 <= _length)
{
uint disp32 = BitConverter.ToUInt32(_codeBuffer, _position);
_position += 4;
return $"{sizePrefix} ptr [0x{disp32:X8}]";
}
return $"{sizePrefix} ptr [???]";
}
else if (rm == 4) // SIB
{
// Handle SIB byte
if (_position < _length)
{
byte sib = _codeBuffer[_position++];
// Decode SIB byte (not implemented yet)
return $"{sizePrefix} ptr [SIB]";
}
return $"{sizePrefix} ptr [???]";
}
else
{
return $"{sizePrefix} ptr [{RegisterNames32[rm]}]";
}
/// <summary>
/// Gets the current position in the code buffer
/// </summary>
/// <returns>The current position</returns>
public int GetPosition()
{
return _position;
}
case 1: // [reg + disp8]
if (rm == 4) // SIB + disp8
{
// Handle SIB byte
if (_position + 1 < _length)
{
byte sib = _codeBuffer[_position++];
sbyte disp8 = (sbyte)_codeBuffer[_position++];
// Decode SIB byte (not implemented yet)
return $"{sizePrefix} ptr [SIB+0x{disp8:X2}]";
}
return $"{sizePrefix} ptr [???]";
}
else
{
if (_position < _length)
{
sbyte disp8 = (sbyte)_codeBuffer[_position++];
string dispStr = disp8 < 0 ? $"-0x{-disp8:X2}" : $"+0x{disp8:X2}";
return $"{sizePrefix} ptr [{RegisterNames32[rm]}{dispStr}]";
}
return $"{sizePrefix} ptr [{RegisterNames32[rm]}+???]";
}
case 2: // [reg + disp32]
if (rm == 4) // SIB + disp32
{
// Handle SIB byte
if (_position + 4 < _length)
{
byte sib = _codeBuffer[_position++];
int disp32 = BitConverter.ToInt32(_codeBuffer, _position);
_position += 4;
// Decode SIB byte (not implemented yet)
return $"{sizePrefix} ptr [SIB+0x{disp32:X8}]";
}
return $"{sizePrefix} ptr [???]";
}
else
{
if (_position + 4 <= _length)
{
int disp32 = BitConverter.ToInt32(_codeBuffer, _position);
_position += 4;
string dispStr = disp32 < 0 ? $"-0x{-disp32:X8}" : $"+0x{disp32:X8}";
return $"{sizePrefix} ptr [{RegisterNames32[rm]}{dispStr}]";
}
return $"{sizePrefix} ptr [{RegisterNames32[rm]}+???]";
}
case 3: // reg
return is64Bit ? "mm" + rm : RegisterNames32[rm];
default:
return "???";
}
/// <summary>
/// Decodes an instruction at the specified position in the code buffer
/// </summary>
/// <param name="position">The position in the code buffer</param>
/// <param name="instruction">The instruction object to populate</param>
/// <returns>The number of bytes read</returns>
public int DecodeAt(int position, Instruction instruction)
{
_position = position;
return Decode(instruction);
}
}

View File

@ -0,0 +1,242 @@
namespace X86Disassembler.X86;
/// <summary>
/// Handles decoding of ModR/M bytes in x86 instructions
/// </summary>
public class ModRMDecoder
{
// ModR/M byte masks
private const byte MOD_MASK = 0xC0; // 11000000b
private const byte REG_MASK = 0x38; // 00111000b
private const byte RM_MASK = 0x07; // 00000111b
// SIB byte masks
private const byte SIB_SCALE_MASK = 0xC0; // 11000000b
private const byte SIB_INDEX_MASK = 0x38; // 00111000b
private const byte SIB_BASE_MASK = 0x07; // 00000111b
// Register names
private static readonly string[] RegisterNames8 = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" };
private static readonly string[] RegisterNames16 = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" };
private static readonly string[] RegisterNames32 = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" };
// Buffer containing the code to decode
private readonly byte[] _codeBuffer;
// The instruction decoder that owns this ModRM decoder
private readonly InstructionDecoder _decoder;
// Length of the buffer
private readonly int _length;
/// <summary>
/// Initializes a new instance of the ModRMDecoder class
/// </summary>
/// <param name="codeBuffer">The buffer containing the code to decode</param>
/// <param name="decoder">The instruction decoder that owns this ModRM decoder</param>
/// <param name="length">The length of the buffer</param>
public ModRMDecoder(byte[] codeBuffer, InstructionDecoder decoder, int length)
{
_codeBuffer = codeBuffer;
_decoder = decoder;
_length = length;
}
/// <summary>
/// Decodes a ModR/M byte to get the operand string
/// </summary>
/// <param name="mod">The mod field (2 bits)</param>
/// <param name="rm">The r/m field (3 bits)</param>
/// <param name="is64Bit">True if the operand is 64-bit</param>
/// <returns>The operand string</returns>
public string DecodeModRM(byte mod, byte rm, bool is64Bit)
{
string sizePrefix = is64Bit ? "qword" : "dword";
int position = _decoder.GetPosition();
switch (mod)
{
case 0: // [reg] or disp32
if (rm == 5) // disp32
{
if (position + 4 <= _length)
{
uint disp32 = BitConverter.ToUInt32(_codeBuffer, position);
_decoder.SetPosition(position + 4);
return $"{sizePrefix} ptr [0x{disp32:X8}]";
}
return $"{sizePrefix} ptr [???]";
}
else if (rm == 4) // SIB
{
// Handle SIB byte
if (position < _length)
{
byte sib = _codeBuffer[position];
_decoder.SetPosition(position + 1);
return DecodeSIB(sib, 0, is64Bit);
}
return $"{sizePrefix} ptr [???]";
}
else
{
return $"{sizePrefix} ptr [{RegisterNames32[rm]}]";
}
case 1: // [reg + disp8]
if (rm == 4) // SIB + disp8
{
// Handle SIB byte
if (position + 1 < _length)
{
byte sib = _codeBuffer[position];
sbyte disp8 = (sbyte)_codeBuffer[position + 1];
_decoder.SetPosition(position + 2);
return DecodeSIB(sib, disp8, is64Bit);
}
return $"{sizePrefix} ptr [???]";
}
else
{
if (position < _length)
{
sbyte disp8 = (sbyte)_codeBuffer[position];
_decoder.SetPosition(position + 1);
string dispStr8 = disp8 < 0 ? $"-0x{-disp8:X2}" : $"+0x{disp8:X2}";
return $"{sizePrefix} ptr [{RegisterNames32[rm]}{dispStr8}]";
}
return $"{sizePrefix} ptr [{RegisterNames32[rm]}+???]";
}
case 2: // [reg + disp32]
if (rm == 4) // SIB + disp32
{
// Handle SIB byte
if (position + 4 < _length)
{
byte sib = _codeBuffer[position];
int disp32 = BitConverter.ToInt32(_codeBuffer, position + 1);
_decoder.SetPosition(position + 5);
return DecodeSIB(sib, disp32, is64Bit);
}
return $"{sizePrefix} ptr [???]";
}
else
{
if (position + 4 <= _length)
{
int disp32 = BitConverter.ToInt32(_codeBuffer, position);
_decoder.SetPosition(position + 4);
string dispStr32 = disp32 < 0 ? $"-0x{-disp32:X8}" : $"+0x{disp32:X8}";
return $"{sizePrefix} ptr [{RegisterNames32[rm]}{dispStr32}]";
}
return $"{sizePrefix} ptr [{RegisterNames32[rm]}+???]";
}
case 3: // reg
return is64Bit ? "mm" + rm : RegisterNames32[rm];
default:
return "???";
}
}
/// <summary>
/// Reads and decodes a ModR/M byte
/// </summary>
/// <param name="is64Bit">True if the operand is 64-bit</param>
/// <returns>A tuple containing the mod, reg, rm fields and the decoded operand string</returns>
public (byte mod, byte reg, byte rm, string operand) ReadModRM(bool is64Bit = false)
{
int position = _decoder.GetPosition();
if (position >= _length)
{
return (0, 0, 0, "???");
}
byte modRM = _codeBuffer[position];
_decoder.SetPosition(position + 1);
byte mod = (byte)((modRM & MOD_MASK) >> 6);
byte reg = (byte)((modRM & REG_MASK) >> 3);
byte rm = (byte)(modRM & RM_MASK);
string operand = DecodeModRM(mod, rm, is64Bit);
return (mod, reg, rm, operand);
}
/// <summary>
/// Decodes a SIB byte
/// </summary>
/// <param name="sib">The SIB byte</param>
/// <param name="displacement">The displacement value</param>
/// <param name="is64Bit">True if the operand is 64-bit</param>
/// <returns>The decoded SIB string</returns>
private string DecodeSIB(byte sib, int displacement, bool is64Bit)
{
string sizePrefix = is64Bit ? "qword" : "dword";
int position = _decoder.GetPosition();
byte scale = (byte)((sib & SIB_SCALE_MASK) >> 6);
byte index = (byte)((sib & SIB_INDEX_MASK) >> 3);
byte @base = (byte)(sib & SIB_BASE_MASK);
// Special case: no index register
if (index == 4)
{
if (@base == 5 && displacement == 0) // Special case: disp32 only
{
if (position + 4 <= _length)
{
uint disp32 = BitConverter.ToUInt32(_codeBuffer, position);
_decoder.SetPosition(position + 4);
return $"{sizePrefix} ptr [0x{disp32:X8}]";
}
return $"{sizePrefix} ptr [???]";
}
else
{
string baseDispStr = "";
if (displacement != 0)
{
baseDispStr = displacement < 0 ?
$"-0x{-displacement:X}" :
$"+0x{displacement:X}";
}
return $"{sizePrefix} ptr [{RegisterNames32[@base]}{baseDispStr}]";
}
}
// Normal case with index register
int scaleFactor = 1 << scale; // 1, 2, 4, or 8
string scaleStr = scaleFactor > 1 ? $"*{scaleFactor}" : "";
string indexDispStr = "";
if (displacement != 0)
{
indexDispStr = displacement < 0 ?
$"-0x{-displacement:X}" :
$"+0x{displacement:X}";
}
return $"{sizePrefix} ptr [{RegisterNames32[@base]}+{RegisterNames32[index]}{scaleStr}{indexDispStr}]";
}
/// <summary>
/// Gets the register name based on the register index and size
/// </summary>
/// <param name="index">The register index</param>
/// <param name="size">The register size (8, 16, or 32 bits)</param>
/// <returns>The register name</returns>
public static string GetRegisterName(int index, int size)
{
return size switch
{
8 => RegisterNames8[index],
16 => RegisterNames16[index],
_ => RegisterNames32[index]
};
}
}

View File

@ -0,0 +1,137 @@
namespace X86Disassembler.X86;
/// <summary>
/// Provides mapping between opcodes and their mnemonics
/// </summary>
public class OpcodeMap
{
// One-byte opcode map
private static readonly string[] OneByteOpcodes = new string[256];
// Condition codes for conditional jumps
private static readonly string[] ConditionCodes = {
"o", "no", "b", "ae", "e", "ne", "be", "a",
"s", "ns", "p", "np", "l", "ge", "le", "g"
};
// Group 1 operations (used with opcodes 0x80, 0x81, 0x83)
public static readonly string[] Group1Operations = {
"add", "or", "adc", "sbb", "and", "sub", "xor", "cmp"
};
// Static constructor to initialize the opcode maps
static OpcodeMap()
{
InitializeOpcodeMaps();
}
/// <summary>
/// Initializes the opcode maps
/// </summary>
private static void InitializeOpcodeMaps()
{
// Initialize all entries to "??" (unknown)
for (int i = 0; i < 256; i++)
{
OneByteOpcodes[i] = "??";
}
// Floating-point instructions
OneByteOpcodes[0xD8] = "fadd";
OneByteOpcodes[0xD9] = "fld";
OneByteOpcodes[0xDA] = "fiadd";
OneByteOpcodes[0xDB] = "fild";
OneByteOpcodes[0xDC] = "fadd";
OneByteOpcodes[0xDD] = "fld";
OneByteOpcodes[0xDE] = "fiadd";
OneByteOpcodes[0xDF] = "fistp";
// Group 1 instructions (ADD, OR, ADC, SBB, AND, SUB, XOR, CMP)
OneByteOpcodes[0x80] = "group1b";
OneByteOpcodes[0x81] = "group1d";
OneByteOpcodes[0x83] = "group1s"; // Sign-extended immediate
// Data transfer instructions
for (int i = 0x88; i <= 0x8B; i++)
{
OneByteOpcodes[i] = "mov";
}
OneByteOpcodes[0xA0] = "mov"; // MOV AL, moffs8
OneByteOpcodes[0xA1] = "mov"; // MOV EAX, moffs32
OneByteOpcodes[0xA2] = "mov"; // MOV moffs8, AL
OneByteOpcodes[0xA3] = "mov"; // MOV moffs32, EAX
// Control flow instructions
OneByteOpcodes[0xCC] = "int3";
OneByteOpcodes[0x90] = "nop";
OneByteOpcodes[0xC3] = "ret";
OneByteOpcodes[0xE8] = "call";
OneByteOpcodes[0xE9] = "jmp";
OneByteOpcodes[0xEB] = "jmp";
// Register operations
for (int i = 0; i <= 7; i++)
{
OneByteOpcodes[0x40 + i] = "inc";
OneByteOpcodes[0x48 + i] = "dec";
OneByteOpcodes[0x50 + i] = "push";
OneByteOpcodes[0x58 + i] = "pop";
}
// XCHG instructions
OneByteOpcodes[0x90] = "nop"; // Special case: XCHG eax, eax = NOP
for (int i = 1; i <= 7; i++)
{
OneByteOpcodes[0x90 + i] = "xchg";
}
// MOV instructions
for (int i = 0; i <= 7; i++)
{
OneByteOpcodes[0xB0 + i] = "mov"; // MOV r8, imm8
OneByteOpcodes[0xB8 + i] = "mov"; // MOV r32, imm32
}
// Conditional jumps
for (int i = 0; i <= 0xF; i++)
{
OneByteOpcodes[0x70 + i] = "j" + ConditionCodes[i];
}
// Other common instructions
OneByteOpcodes[0x68] = "push"; // PUSH imm32
OneByteOpcodes[0x6A] = "push"; // PUSH imm8
OneByteOpcodes[0xCD] = "int"; // INT imm8
OneByteOpcodes[0xE3] = "jecxz"; // JECXZ rel8
}
/// <summary>
/// Gets the mnemonic for a one-byte opcode
/// </summary>
/// <param name="opcode">The opcode</param>
/// <returns>The mnemonic</returns>
public static string GetMnemonic(byte opcode)
{
return OneByteOpcodes[opcode];
}
/// <summary>
/// Checks if the opcode is a Group 1 opcode
/// </summary>
/// <param name="opcode">The opcode to check</param>
/// <returns>True if the opcode is a Group 1 opcode</returns>
public static bool IsGroup1Opcode(byte opcode)
{
return opcode == 0x80 || opcode == 0x81 || opcode == 0x83;
}
/// <summary>
/// Checks if the opcode is a floating-point instruction
/// </summary>
/// <param name="opcode">The opcode to check</param>
/// <returns>True if the opcode is a floating-point instruction</returns>
public static bool IsFloatingPointOpcode(byte opcode)
{
return opcode >= 0xD8 && opcode <= 0xDF;
}
}