diff --git a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs
index 24f9245..8a30c22 100644
--- a/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs
+++ b/X86Disassembler/X86/Handlers/InstructionHandlerFactory.cs
@@ -13,6 +13,7 @@ using X86Disassembler.X86.Handlers.Or;
using X86Disassembler.X86.Handlers.Pop;
using X86Disassembler.X86.Handlers.Push;
using X86Disassembler.X86.Handlers.Ret;
+using X86Disassembler.X86.Handlers.String;
using X86Disassembler.X86.Handlers.Test;
using X86Disassembler.X86.Handlers.Xchg;
using X86Disassembler.X86.Handlers.Xor;
@@ -41,32 +42,36 @@ public class InstructionHandlerFactory
_decoder = decoder;
_length = length;
- RegisterHandlers();
+ RegisterAllHandlers();
}
///
/// Registers all handlers
///
- private void RegisterHandlers()
+ private void RegisterAllHandlers()
{
// Register specific instruction handlers
_handlers.Add(new Int3Handler(_codeBuffer, _decoder, _length));
-
- RegisterArithmeticUnaryHandlers();
+
RegisterArithmeticImmediateHandlers();
- RegisterReturnHandlers();
- RegisterCallHandlers();
- RegisterJumpHandlers();
- RegisterTestHandlers();
+ RegisterArithmeticUnaryHandlers();
+ RegisterAddHandlers();
+ RegisterCmpHandlers();
RegisterXorHandlers();
RegisterOrHandlers();
- RegisterLeaHandlers();
- RegisterCmpHandlers();
+ RegisterTestHandlers();
+ RegisterDataTransferHandlers();
+ RegisterJumpHandlers();
+ RegisterCallHandlers();
+ RegisterReturnHandlers();
RegisterDecHandlers();
RegisterIncHandlers();
- RegisterAddHandlers();
- RegisterDataTransferHandlers();
+ RegisterPushHandlers();
+ RegisterPopHandlers();
+ RegisterLeaHandlers();
RegisterFloatingPointHandlers();
+ RegisterStringHandlers();
+ RegisterMovHandlers();
}
///
@@ -295,6 +300,51 @@ public class InstructionHandlerFactory
_handlers.Add(new LoadStoreInt16Handler(_codeBuffer, _decoder, _length));
}
+ ///
+ /// Registers all String instruction handlers
+ ///
+ private void RegisterStringHandlers()
+ {
+ // Add String instruction handlers
+ _handlers.Add(new RepMovsHandler(_codeBuffer, _decoder, _length));
+ }
+
+ ///
+ /// Registers all MOV instruction handlers
+ ///
+ private void RegisterMovHandlers()
+ {
+ // Add MOV handlers
+ _handlers.Add(new MovRegMemHandler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new MovMemRegHandler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new MovRegImm32Handler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new MovRegImm8Handler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new MovEaxMoffsHandler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new MovMoffsEaxHandler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new MovRm32Imm32Handler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new MovRm8Imm8Handler(_codeBuffer, _decoder, _length));
+ }
+
+ ///
+ /// Registers all PUSH instruction handlers
+ ///
+ private void RegisterPushHandlers()
+ {
+ // Add PUSH handlers
+ _handlers.Add(new PushRegHandler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new PushImm32Handler(_codeBuffer, _decoder, _length));
+ _handlers.Add(new PushImm8Handler(_codeBuffer, _decoder, _length));
+ }
+
+ ///
+ /// Registers all POP instruction handlers
+ ///
+ private void RegisterPopHandlers()
+ {
+ // Add POP handlers
+ _handlers.Add(new PopRegHandler(_codeBuffer, _decoder, _length));
+ }
+
///
/// Gets the handler that can decode the given opcode
///
diff --git a/X86Disassembler/X86/InstructionDecoder.cs b/X86Disassembler/X86/InstructionDecoder.cs
index a8e7725..5af5ddb 100644
--- a/X86Disassembler/X86/InstructionDecoder.cs
+++ b/X86Disassembler/X86/InstructionDecoder.cs
@@ -19,13 +19,10 @@ public class InstructionDecoder
// The instruction handler factory
private readonly InstructionHandlerFactory _handlerFactory;
- // Instruction prefixes
- private bool _operandSizePrefix;
- private bool _addressSizePrefix;
- private bool _segmentOverridePrefix;
- private bool _lockPrefix;
- private bool _repPrefix;
- private string _segmentOverride;
+ // Specialized decoders
+ private readonly PrefixDecoder _prefixDecoder;
+ private readonly ModRMDecoder _modRMDecoder;
+ private readonly StringInstructionDecoder _stringDecoder;
///
/// Initializes a new instance of the InstructionDecoder class
@@ -37,7 +34,11 @@ public class InstructionDecoder
_codeBuffer = codeBuffer;
_length = length;
_position = 0;
- _segmentOverride = "";
+
+ // Create specialized decoders
+ _prefixDecoder = new PrefixDecoder();
+ _modRMDecoder = new ModRMDecoder(codeBuffer, this, length);
+ _stringDecoder = new StringInstructionDecoder(codeBuffer, length);
// Create the instruction handler factory
_handlerFactory = new InstructionHandlerFactory(_codeBuffer, this, _length);
@@ -55,12 +56,7 @@ public class InstructionDecoder
}
// Reset prefix flags
- _operandSizePrefix = false;
- _addressSizePrefix = false;
- _segmentOverridePrefix = false;
- _lockPrefix = false;
- _repPrefix = false;
- _segmentOverride = string.Empty;
+ _prefixDecoder.Reset();
// Save the start position of the instruction
int startPosition = _position;
@@ -76,54 +72,21 @@ public class InstructionDecoder
{
byte prefix = _codeBuffer[_position];
- if (prefix == 0x66) // Operand size prefix
+ if (_prefixDecoder.DecodePrefix(prefix))
{
- _operandSizePrefix = true;
- _position++;
- }
- else if (prefix == 0x67) // Address size prefix
- {
- _addressSizePrefix = true;
- _position++;
- }
- else if ((prefix >= 0x26 && prefix <= 0x3E && (prefix & 0x7) == 0x6) || prefix == 0x64 || prefix == 0x65) // Segment override prefix
- {
- _segmentOverridePrefix = true;
- switch (prefix)
- {
- case 0x26: _segmentOverride = "es"; break;
- case 0x2E: _segmentOverride = "cs"; break;
- case 0x36: _segmentOverride = "ss"; break;
- case 0x3E: _segmentOverride = "ds"; break;
- case 0x64: _segmentOverride = "fs"; break;
- case 0x65: _segmentOverride = "gs"; break;
- }
- _position++;
- }
- else if (prefix == 0xF0) // LOCK prefix
- {
- _lockPrefix = true;
- _position++;
- }
- else if (prefix == 0xF2 || prefix == 0xF3) // REP/REPNE prefix
- {
- _repPrefix = true;
_position++;
- // Special case for string instructions
- if (_position < _length)
+ // Special case for REP/REPNE prefix followed by string instruction
+ if ((prefix == 0xF2 || prefix == 0xF3) && _position < _length)
{
- byte stringOp = _codeBuffer[_position];
- if (stringOp == 0xA4 || stringOp == 0xA5 || // MOVS
- stringOp == 0xAA || stringOp == 0xAB || // STOS
- stringOp == 0xAC || stringOp == 0xAD || // LODS
- stringOp == 0xAE || stringOp == 0xAF) // SCAS
+ byte nextByte = _codeBuffer[_position];
+ if (_stringDecoder.IsStringInstruction(nextByte))
{
// Skip the string operation opcode
_position++;
// Handle REP string instruction
- return CreateStringInstruction(prefix, stringOp, startPosition);
+ return _stringDecoder.CreateStringInstruction(prefix, nextByte, startPosition, _position);
}
}
}
@@ -137,9 +100,9 @@ public class InstructionDecoder
{
// If we reached the end of the buffer while processing prefixes,
// create an instruction with just the prefix information
- if (_segmentOverridePrefix)
+ if (_prefixDecoder.HasSegmentOverridePrefix())
{
- instruction.Mnemonic = _segmentOverride;
+ instruction.Mnemonic = _prefixDecoder.GetSegmentOverride();
instruction.Operands = "";
// Set the raw bytes
@@ -174,22 +137,9 @@ public class InstructionDecoder
instruction.Operands = "??";
}
- // Add REP prefix to the instruction if present
- if (_repPrefix && !instruction.Mnemonic.StartsWith("rep"))
- {
- instruction.Mnemonic = $"rep {instruction.Mnemonic}";
- }
-
- // Add segment override prefix to the instruction if present
- if (_segmentOverridePrefix && !string.IsNullOrEmpty(instruction.Operands))
- {
- // If the instruction has memory operands, add the segment override
- if (instruction.Operands.Contains("["))
- {
- // Replace the first '[' with the segment override
- instruction.Operands = instruction.Operands.Replace("[", $"{_segmentOverride}:[" );
- }
- }
+ // Apply prefixes to the instruction
+ instruction.Mnemonic = _prefixDecoder.ApplyRepPrefix(instruction.Mnemonic);
+ instruction.Operands = _prefixDecoder.ApplySegmentOverride(instruction.Operands);
// Set the raw bytes
int bytesLength = _position - startPosition;
@@ -199,62 +149,6 @@ public class InstructionDecoder
return instruction;
}
- ///
- /// Creates an instruction for a string operation with REP/REPNE prefix
- ///
- /// The REP/REPNE prefix (0xF2 or 0xF3)
- /// The string operation opcode
- /// The start position of the instruction
- /// The created instruction
- private Instruction CreateStringInstruction(byte prefix, byte stringOp, int startPosition)
- {
- // Create a new instruction
- Instruction instruction = new Instruction
- {
- Address = (uint)startPosition,
- };
-
- // Get the mnemonic for the string operation
- string mnemonic = OpcodeMap.GetMnemonic(stringOp);
- instruction.Mnemonic = prefix == 0xF3 ? $"rep {mnemonic}" : $"repne {mnemonic}";
-
- // Set operands based on the string operation
- switch (stringOp)
- {
- case 0xA4: // MOVSB
- instruction.Operands = "byte ptr [edi], byte ptr [esi]";
- break;
- case 0xA5: // MOVSD
- instruction.Operands = "dword ptr [edi], dword ptr [esi]";
- break;
- case 0xAA: // STOSB
- instruction.Operands = "byte ptr [edi], al";
- break;
- case 0xAB: // STOSD
- instruction.Operands = "dword ptr [edi], eax";
- break;
- case 0xAC: // LODSB
- instruction.Operands = "al, byte ptr [esi]";
- break;
- case 0xAD: // LODSD
- instruction.Operands = "eax, dword ptr [esi]";
- break;
- case 0xAE: // SCASB
- instruction.Operands = "al, byte ptr [edi]";
- break;
- case 0xAF: // SCASD
- instruction.Operands = "eax, dword ptr [edi]";
- break;
- }
-
- // Set the raw bytes
- int length = _position - startPosition;
- instruction.RawBytes = new byte[length];
- Array.Copy(_codeBuffer, startPosition, instruction.RawBytes, 0, length);
-
- return instruction;
- }
-
///
/// Gets the current position in the buffer
///
@@ -279,7 +173,7 @@ public class InstructionDecoder
/// True if the operand size prefix is present
public bool HasOperandSizePrefix()
{
- return _operandSizePrefix;
+ return _prefixDecoder.HasOperandSizePrefix();
}
///
@@ -288,7 +182,7 @@ public class InstructionDecoder
/// True if the address size prefix is present
public bool HasAddressSizePrefix()
{
- return _addressSizePrefix;
+ return _prefixDecoder.HasAddressSizePrefix();
}
///
@@ -297,7 +191,7 @@ public class InstructionDecoder
/// True if a segment override prefix is present
public bool HasSegmentOverridePrefix()
{
- return _segmentOverridePrefix;
+ return _prefixDecoder.HasSegmentOverridePrefix();
}
///
@@ -306,7 +200,7 @@ public class InstructionDecoder
/// The segment override prefix, or an empty string if none is present
public string GetSegmentOverride()
{
- return _segmentOverride;
+ return _prefixDecoder.GetSegmentOverride();
}
///
@@ -315,7 +209,7 @@ public class InstructionDecoder
/// True if the LOCK prefix is present
public bool HasLockPrefix()
{
- return _lockPrefix;
+ return _prefixDecoder.HasLockPrefix();
}
///
@@ -324,7 +218,7 @@ public class InstructionDecoder
/// True if the REP/REPNE prefix is present
public bool HasRepPrefix()
{
- return _repPrefix;
+ return _prefixDecoder.HasRepPrefix();
}
///
@@ -352,7 +246,7 @@ public class InstructionDecoder
return 0;
}
- ushort value = BitConverter.ToUInt16(_codeBuffer, _position);
+ ushort value = (ushort)(_codeBuffer[_position] | (_codeBuffer[_position + 1] << 8));
_position += 2;
return value;
}
@@ -368,7 +262,10 @@ public class InstructionDecoder
return 0;
}
- uint value = BitConverter.ToUInt32(_codeBuffer, _position);
+ uint value = (uint)(_codeBuffer[_position] |
+ (_codeBuffer[_position + 1] << 8) |
+ (_codeBuffer[_position + 2] << 16) |
+ (_codeBuffer[_position + 3] << 24));
_position += 4;
return value;
}
diff --git a/X86Disassembler/X86/PrefixDecoder.cs b/X86Disassembler/X86/PrefixDecoder.cs
new file mode 100644
index 0000000..39f7c8b
--- /dev/null
+++ b/X86Disassembler/X86/PrefixDecoder.cs
@@ -0,0 +1,170 @@
+namespace X86Disassembler.X86;
+
+///
+/// Handles decoding of instruction prefixes
+///
+public class PrefixDecoder
+{
+ // Prefix flags
+ private bool _operandSizePrefix;
+ private bool _addressSizePrefix;
+ private bool _segmentOverridePrefix;
+ private bool _lockPrefix;
+ private bool _repPrefix;
+ private string _segmentOverride = string.Empty;
+
+ ///
+ /// Initializes a new instance of the PrefixDecoder class
+ ///
+ public PrefixDecoder()
+ {
+ Reset();
+ }
+
+ ///
+ /// Resets all prefix flags
+ ///
+ public void Reset()
+ {
+ _operandSizePrefix = false;
+ _addressSizePrefix = false;
+ _segmentOverridePrefix = false;
+ _lockPrefix = false;
+ _repPrefix = false;
+ _segmentOverride = string.Empty;
+ }
+
+ ///
+ /// Decodes a prefix byte
+ ///
+ /// The prefix byte
+ /// True if the byte was a prefix, false otherwise
+ public bool DecodePrefix(byte prefix)
+ {
+ if (prefix == 0x66) // Operand size prefix
+ {
+ _operandSizePrefix = true;
+ return true;
+ }
+ else if (prefix == 0x67) // Address size prefix
+ {
+ _addressSizePrefix = true;
+ return true;
+ }
+ else if ((prefix >= 0x26 && prefix <= 0x3E && (prefix & 0x7) == 0x6) || prefix == 0x64 || prefix == 0x65) // Segment override prefix
+ {
+ _segmentOverridePrefix = true;
+ switch (prefix)
+ {
+ case 0x26: _segmentOverride = "es"; break;
+ case 0x2E: _segmentOverride = "cs"; break;
+ case 0x36: _segmentOverride = "ss"; break;
+ case 0x3E: _segmentOverride = "ds"; break;
+ case 0x64: _segmentOverride = "fs"; break;
+ case 0x65: _segmentOverride = "gs"; break;
+ }
+ return true;
+ }
+ else if (prefix == 0xF0) // LOCK prefix
+ {
+ _lockPrefix = true;
+ return true;
+ }
+ else if (prefix == 0xF2 || prefix == 0xF3) // REP/REPNE prefix
+ {
+ _repPrefix = true;
+ return true;
+ }
+
+ return false;
+ }
+
+ ///
+ /// Checks if the operand size prefix is present
+ ///
+ /// True if the operand size prefix is present
+ public bool HasOperandSizePrefix()
+ {
+ return _operandSizePrefix;
+ }
+
+ ///
+ /// Checks if the address size prefix is present
+ ///
+ /// True if the address size prefix is present
+ public bool HasAddressSizePrefix()
+ {
+ return _addressSizePrefix;
+ }
+
+ ///
+ /// Checks if a segment override prefix is present
+ ///
+ /// True if a segment override prefix is present
+ public bool HasSegmentOverridePrefix()
+ {
+ return _segmentOverridePrefix;
+ }
+
+ ///
+ /// Gets the segment override prefix
+ ///
+ /// The segment override prefix, or an empty string if none is present
+ public string GetSegmentOverride()
+ {
+ return _segmentOverride;
+ }
+
+ ///
+ /// Checks if the LOCK prefix is present
+ ///
+ /// True if the LOCK prefix is present
+ public bool HasLockPrefix()
+ {
+ return _lockPrefix;
+ }
+
+ ///
+ /// Checks if the REP/REPNE prefix is present
+ ///
+ /// True if the REP/REPNE prefix is present
+ public bool HasRepPrefix()
+ {
+ return _repPrefix;
+ }
+
+ ///
+ /// Applies the segment override prefix to the operands string if applicable
+ ///
+ /// The operands string
+ /// The operands string with segment override applied
+ public string ApplySegmentOverride(string operands)
+ {
+ if (_segmentOverridePrefix && !string.IsNullOrEmpty(operands))
+ {
+ // If the instruction has memory operands, add the segment override
+ if (operands.Contains("["))
+ {
+ // Replace the first '[' with the segment override
+ return operands.Replace("[", $"{_segmentOverride}:[" );
+ }
+ }
+
+ return operands;
+ }
+
+ ///
+ /// Applies the REP prefix to the mnemonic if applicable
+ ///
+ /// The mnemonic
+ /// The mnemonic with REP prefix applied
+ public string ApplyRepPrefix(string mnemonic)
+ {
+ if (_repPrefix && !mnemonic.StartsWith("rep"))
+ {
+ return $"rep {mnemonic}";
+ }
+
+ return mnemonic;
+ }
+}
diff --git a/X86Disassembler/X86/StringInstructionDecoder.cs b/X86Disassembler/X86/StringInstructionDecoder.cs
new file mode 100644
index 0000000..7c893d4
--- /dev/null
+++ b/X86Disassembler/X86/StringInstructionDecoder.cs
@@ -0,0 +1,98 @@
+namespace X86Disassembler.X86;
+
+///
+/// Handles decoding of string instructions
+///
+public class StringInstructionDecoder
+{
+ // The buffer containing the code to decode
+ private readonly byte[] _codeBuffer;
+
+ // The length of the buffer
+ private readonly int _length;
+
+ ///
+ /// Initializes a new instance of the StringInstructionDecoder class
+ ///
+ /// The buffer containing the code to decode
+ /// The length of the buffer
+ public StringInstructionDecoder(byte[] codeBuffer, int length)
+ {
+ _codeBuffer = codeBuffer;
+ _length = length;
+ }
+
+ ///
+ /// Checks if the opcode is a string instruction
+ ///
+ /// The opcode to check
+ /// True if the opcode is a string instruction
+ public bool IsStringInstruction(byte opcode)
+ {
+ return opcode == 0xA4 || opcode == 0xA5 || // MOVS
+ opcode == 0xAA || opcode == 0xAB || // STOS
+ opcode == 0xAC || opcode == 0xAD || // LODS
+ opcode == 0xAE || opcode == 0xAF; // SCAS
+ }
+
+ ///
+ /// Creates an instruction for a string operation with REP/REPNE prefix
+ ///
+ /// The REP/REPNE prefix (0xF2 or 0xF3)
+ /// The string operation opcode
+ /// The start position of the instruction
+ /// The current position after reading the string opcode
+ /// The created instruction
+ public Instruction CreateStringInstruction(byte prefix, byte stringOp, int startPosition, int currentPosition)
+ {
+ // Create a new instruction
+ Instruction instruction = new Instruction
+ {
+ Address = (uint)startPosition,
+ };
+
+ // Get the mnemonic for the string operation
+ string mnemonic = OpcodeMap.GetMnemonic(stringOp);
+ instruction.Mnemonic = prefix == 0xF3 ? $"rep {mnemonic}" : $"repne {mnemonic}";
+
+ // Set operands based on the string operation
+ instruction.Operands = GetStringOperands(stringOp);
+
+ // Set the raw bytes
+ int length = currentPosition - startPosition;
+ instruction.RawBytes = new byte[length];
+ Array.Copy(_codeBuffer, startPosition, instruction.RawBytes, 0, length);
+
+ return instruction;
+ }
+
+ ///
+ /// Gets the operands for a string instruction
+ ///
+ /// The string operation opcode
+ /// The operands string
+ private string GetStringOperands(byte stringOp)
+ {
+ switch (stringOp)
+ {
+ case 0xA4: // MOVSB
+ return "byte ptr [edi], byte ptr [esi]";
+ case 0xA5: // MOVSD
+ return "dword ptr [edi], dword ptr [esi]";
+ case 0xAA: // STOSB
+ return "byte ptr [edi], al";
+ case 0xAB: // STOSD
+ return "dword ptr [edi], eax";
+ case 0xAC: // LODSB
+ return "al, byte ptr [esi]";
+ case 0xAD: // LODSD
+ return "eax, dword ptr [esi]";
+ case 0xAE: // SCASB
+ return "al, byte ptr [edi]";
+ case 0xAF: // SCASD
+ return "eax, dword ptr [edi]";
+ default:
+ return "??";
+ }
+ }
+}