diff --git a/X86Disassembler/PE/OptionalHeader.cs b/X86Disassembler/PE/OptionalHeader.cs index ff335a1..50ab235 100644 --- a/X86Disassembler/PE/OptionalHeader.cs +++ b/X86Disassembler/PE/OptionalHeader.cs @@ -21,7 +21,7 @@ public class OptionalHeader public uint BaseOfData; // Base of data section (PE32 only) // Windows-specific fields - public object ImageBase; // Image base address (uint for PE32, ulong for PE32+) + public ulong ImageBase; // Image base address (uint for PE32, ulong for PE32+) public uint SectionAlignment; // Section alignment public uint FileAlignment; // File alignment public ushort MajorOperatingSystemVersion; // Major OS version @@ -36,10 +36,10 @@ public class OptionalHeader public uint CheckSum; // Checksum public ushort Subsystem; // Subsystem public ushort DllCharacteristics; // DLL characteristics - public object SizeOfStackReserve; // Size of stack reserve (uint for PE32, ulong for PE32+) - public object SizeOfStackCommit; // Size of stack commit (uint for PE32, ulong for PE32+) - public object SizeOfHeapReserve; // Size of heap reserve (uint for PE32, ulong for PE32+) - public object SizeOfHeapCommit; // Size of heap commit (uint for PE32, ulong for PE32+) + public ulong SizeOfStackReserve; // Size of stack reserve (uint for PE32, ulong for PE32+) + public ulong SizeOfStackCommit; // Size of stack commit (uint for PE32, ulong for PE32+) + public ulong SizeOfHeapReserve; // Size of heap reserve (uint for PE32, ulong for PE32+) + public ulong SizeOfHeapCommit; // Size of heap commit (uint for PE32, ulong for PE32+) public uint LoaderFlags; // Loader flags public uint NumberOfRvaAndSizes; // Number of RVA and sizes diff --git a/X86Disassembler/PEFormat.cs b/X86Disassembler/PEFormat.cs deleted file mode 100644 index 7cc98e5..0000000 --- a/X86Disassembler/PEFormat.cs +++ /dev/null @@ -1,895 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Runtime.InteropServices; -using System.Text; - -namespace X86Disassembler -{ - /// - /// Represents a Portable Executable (PE) file format parser - /// - public class PEFormat - { - // DOS Header constants - private const ushort DOS_SIGNATURE = 0x5A4D; // 'MZ' - private const uint PE_SIGNATURE = 0x00004550; // 'PE\0\0' - - // Optional Header Magic values - private const ushort PE32_MAGIC = 0x10B; // 32-bit executable - private const ushort PE32PLUS_MAGIC = 0x20B; // 64-bit executable - - // Section characteristics flags - private const uint IMAGE_SCN_CNT_CODE = 0x00000020; // Section contains code - private const uint IMAGE_SCN_MEM_EXECUTE = 0x20000000; // Section is executable - private const uint IMAGE_SCN_MEM_READ = 0x40000000; // Section is readable - private const uint IMAGE_SCN_MEM_WRITE = 0x80000000; // Section is writable - - // Data directories - private const int IMAGE_DIRECTORY_ENTRY_EXPORT = 0; // Export Directory - private const int IMAGE_DIRECTORY_ENTRY_IMPORT = 1; // Import Directory - private const int IMAGE_DIRECTORY_ENTRY_RESOURCE = 2; // Resource Directory - private const int IMAGE_DIRECTORY_ENTRY_EXCEPTION = 3; // Exception Directory - private const int IMAGE_DIRECTORY_ENTRY_SECURITY = 4; // Security Directory - private const int IMAGE_DIRECTORY_ENTRY_BASERELOC = 5; // Base Relocation Table - private const int IMAGE_DIRECTORY_ENTRY_DEBUG = 6; // Debug Directory - private const int IMAGE_DIRECTORY_ENTRY_ARCHITECTURE = 7; // Architecture Specific Data - private const int IMAGE_DIRECTORY_ENTRY_GLOBALPTR = 8; // RVA of GP - private const int IMAGE_DIRECTORY_ENTRY_TLS = 9; // TLS Directory - private const int IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG = 10; // Load Configuration Directory - private const int IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT = 11; // Bound Import Directory - private const int IMAGE_DIRECTORY_ENTRY_IAT = 12; // Import Address Table - private const int IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT = 13; // Delay Load Import Descriptors - private const int IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR = 14; // COM Runtime descriptor - - // PE file data - private byte[] _fileData; - - // Parsed headers - public DOSHeader DosHeader { get; private set; } - public FileHeader FileHeader { get; private set; } - public OptionalHeader OptionalHeader { get; private set; } - public List SectionHeaders { get; private set; } - public bool Is64Bit { get; private set; } - - // Export and Import information - public ExportDirectory ExportDirectory { get; private set; } - public List ExportedFunctions { get; private set; } - public List ImportDescriptors { get; private set; } - - /// - /// Parses a PE file from the given byte array - /// - /// The raw file data - public PEFormat(byte[] fileData) - { - _fileData = fileData; - SectionHeaders = new List(); - ExportedFunctions = new List(); - ImportDescriptors = new List(); - Parse(); - } - - /// - /// Parses the PE file structure - /// - private void Parse() - { - using (MemoryStream stream = new MemoryStream(_fileData)) - using (BinaryReader reader = new BinaryReader(stream)) - { - // Parse DOS header - DosHeader = ParseDOSHeader(reader); - - // Move to PE header - reader.BaseStream.Seek(DosHeader.e_lfanew, SeekOrigin.Begin); - - // Verify PE signature - uint peSignature = reader.ReadUInt32(); - if (peSignature != PE_SIGNATURE) - { - throw new InvalidDataException("Invalid PE signature"); - } - - // Parse File Header - FileHeader = ParseFileHeader(reader); - - // Parse Optional Header - OptionalHeader = ParseOptionalHeader(reader); - - // Parse Section Headers - for (int i = 0; i < FileHeader.NumberOfSections; i++) - { - SectionHeaders.Add(ParseSectionHeader(reader)); - } - - // Parse Export Directory - if (OptionalHeader.DataDirectories.Length > IMAGE_DIRECTORY_ENTRY_EXPORT && - OptionalHeader.DataDirectories[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress != 0) - { - ExportDirectory = ParseExportDirectory(reader, OptionalHeader.DataDirectories[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress); - ParseExportedFunctions(reader); - } - - // Parse Import Descriptors - if (OptionalHeader.DataDirectories.Length > IMAGE_DIRECTORY_ENTRY_IMPORT && - OptionalHeader.DataDirectories[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress != 0) - { - ImportDescriptors = ParseImportDescriptors(reader, OptionalHeader.DataDirectories[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress); - } - } - } - - /// - /// Parses the DOS header - /// - private DOSHeader ParseDOSHeader(BinaryReader reader) - { - DOSHeader header = new DOSHeader(); - - header.e_magic = reader.ReadUInt16(); - if (header.e_magic != DOS_SIGNATURE) - { - throw new InvalidDataException("Invalid DOS signature (MZ)"); - } - - header.e_cblp = reader.ReadUInt16(); - header.e_cp = reader.ReadUInt16(); - header.e_crlc = reader.ReadUInt16(); - header.e_cparhdr = reader.ReadUInt16(); - header.e_minalloc = reader.ReadUInt16(); - header.e_maxalloc = reader.ReadUInt16(); - header.e_ss = reader.ReadUInt16(); - header.e_sp = reader.ReadUInt16(); - header.e_csum = reader.ReadUInt16(); - header.e_ip = reader.ReadUInt16(); - header.e_cs = reader.ReadUInt16(); - header.e_lfarlc = reader.ReadUInt16(); - header.e_ovno = reader.ReadUInt16(); - - header.e_res = new ushort[4]; - for (int i = 0; i < 4; i++) - { - header.e_res[i] = reader.ReadUInt16(); - } - - header.e_oemid = reader.ReadUInt16(); - header.e_oeminfo = reader.ReadUInt16(); - - header.e_res2 = new ushort[10]; - for (int i = 0; i < 10; i++) - { - header.e_res2[i] = reader.ReadUInt16(); - } - - header.e_lfanew = reader.ReadUInt32(); - - return header; - } - - /// - /// Parses the File header - /// - private FileHeader ParseFileHeader(BinaryReader reader) - { - FileHeader header = new FileHeader(); - - header.Machine = reader.ReadUInt16(); - header.NumberOfSections = reader.ReadUInt16(); - header.TimeDateStamp = reader.ReadUInt32(); - header.PointerToSymbolTable = reader.ReadUInt32(); - header.NumberOfSymbols = reader.ReadUInt32(); - header.SizeOfOptionalHeader = reader.ReadUInt16(); - header.Characteristics = reader.ReadUInt16(); - - return header; - } - - /// - /// Parses the Optional header - /// - private OptionalHeader ParseOptionalHeader(BinaryReader reader) - { - OptionalHeader header = new OptionalHeader(); - - // Standard fields - header.Magic = reader.ReadUInt16(); - - // Determine if this is a PE32 or PE32+ file - Is64Bit = header.Magic == PE32PLUS_MAGIC; - - header.MajorLinkerVersion = reader.ReadByte(); - header.MinorLinkerVersion = reader.ReadByte(); - header.SizeOfCode = reader.ReadUInt32(); - header.SizeOfInitializedData = reader.ReadUInt32(); - header.SizeOfUninitializedData = reader.ReadUInt32(); - header.AddressOfEntryPoint = reader.ReadUInt32(); - header.BaseOfCode = reader.ReadUInt32(); - - // PE32 has BaseOfData, PE32+ doesn't - if (!Is64Bit) - { - header.BaseOfData = reader.ReadUInt32(); - } - - // Windows-specific fields - if (Is64Bit) - { - header.ImageBase = reader.ReadUInt64(); - } - else - { - header.ImageBase = reader.ReadUInt32(); - } - - header.SectionAlignment = reader.ReadUInt32(); - header.FileAlignment = reader.ReadUInt32(); - header.MajorOperatingSystemVersion = reader.ReadUInt16(); - header.MinorOperatingSystemVersion = reader.ReadUInt16(); - header.MajorImageVersion = reader.ReadUInt16(); - header.MinorImageVersion = reader.ReadUInt16(); - header.MajorSubsystemVersion = reader.ReadUInt16(); - header.MinorSubsystemVersion = reader.ReadUInt16(); - header.Win32VersionValue = reader.ReadUInt32(); - header.SizeOfImage = reader.ReadUInt32(); - header.SizeOfHeaders = reader.ReadUInt32(); - header.CheckSum = reader.ReadUInt32(); - header.Subsystem = reader.ReadUInt16(); - header.DllCharacteristics = reader.ReadUInt16(); - - // Size fields differ between PE32 and PE32+ - if (Is64Bit) - { - header.SizeOfStackReserve = reader.ReadUInt64(); - header.SizeOfStackCommit = reader.ReadUInt64(); - header.SizeOfHeapReserve = reader.ReadUInt64(); - header.SizeOfHeapCommit = reader.ReadUInt64(); - } - else - { - header.SizeOfStackReserve = reader.ReadUInt32(); - header.SizeOfStackCommit = reader.ReadUInt32(); - header.SizeOfHeapReserve = reader.ReadUInt32(); - header.SizeOfHeapCommit = reader.ReadUInt32(); - } - - header.LoaderFlags = reader.ReadUInt32(); - header.NumberOfRvaAndSizes = reader.ReadUInt32(); - - // Data directories - int numDirectories = (int)Math.Min(header.NumberOfRvaAndSizes, 16); // Maximum of 16 directories - header.DataDirectories = new DataDirectory[numDirectories]; - - for (int i = 0; i < numDirectories; i++) - { - DataDirectory dir = new DataDirectory(); - dir.VirtualAddress = reader.ReadUInt32(); - dir.Size = reader.ReadUInt32(); - header.DataDirectories[i] = dir; - } - - return header; - } - - /// - /// Parses a section header - /// - private SectionHeader ParseSectionHeader(BinaryReader reader) - { - SectionHeader header = new SectionHeader(); - - // Read section name (8 bytes) - byte[] nameBytes = reader.ReadBytes(8); - // Convert to string, removing any null characters - header.Name = Encoding.ASCII.GetString(nameBytes).TrimEnd('\0'); - - header.VirtualSize = reader.ReadUInt32(); - header.VirtualAddress = reader.ReadUInt32(); - header.SizeOfRawData = reader.ReadUInt32(); - header.PointerToRawData = reader.ReadUInt32(); - header.PointerToRelocations = reader.ReadUInt32(); - header.PointerToLinenumbers = reader.ReadUInt32(); - header.NumberOfRelocations = reader.ReadUInt16(); - header.NumberOfLinenumbers = reader.ReadUInt16(); - header.Characteristics = reader.ReadUInt32(); - - return header; - } - - /// - /// Parses the Export Directory - /// - private ExportDirectory ParseExportDirectory(BinaryReader reader, uint rva) - { - ExportDirectory directory = new ExportDirectory(); - - reader.BaseStream.Seek(RvaToOffset(rva), SeekOrigin.Begin); - - directory.Characteristics = reader.ReadUInt32(); - directory.TimeDateStamp = reader.ReadUInt32(); - directory.MajorVersion = reader.ReadUInt16(); - directory.MinorVersion = reader.ReadUInt16(); - directory.Name = reader.ReadUInt32(); - directory.Base = reader.ReadUInt32(); - directory.NumberOfFunctions = reader.ReadUInt32(); - directory.NumberOfNames = reader.ReadUInt32(); - directory.AddressOfFunctions = reader.ReadUInt32(); - directory.AddressOfNames = reader.ReadUInt32(); - directory.AddressOfNameOrdinals = reader.ReadUInt32(); - - // Read the DLL name - try - { - uint dllNameRVA = directory.Name; - uint dllNameOffset = RvaToOffset(dllNameRVA); - reader.BaseStream.Seek(dllNameOffset, SeekOrigin.Begin); - - // Read the null-terminated ASCII string - StringBuilder nameBuilder = new StringBuilder(); - byte b; - - while ((b = reader.ReadByte()) != 0) - { - nameBuilder.Append((char)b); - } - - directory.DllName = nameBuilder.ToString(); - } - catch (Exception) - { - directory.DllName = "Unknown"; - } - - return directory; - } - - /// - /// Parses the Import Descriptors - /// - private List ParseImportDescriptors(BinaryReader reader, uint rva) - { - List descriptors = new List(); - - try - { - uint importTableOffset = RvaToOffset(rva); - reader.BaseStream.Seek(importTableOffset, SeekOrigin.Begin); - - int descriptorCount = 0; - - while (true) - { - descriptorCount++; - - // Read the import descriptor - uint originalFirstThunk = reader.ReadUInt32(); - uint timeDateStamp = reader.ReadUInt32(); - uint forwarderChain = reader.ReadUInt32(); - uint nameRva = reader.ReadUInt32(); - uint firstThunk = reader.ReadUInt32(); - - // Check if we've reached the end of the import descriptors - if (originalFirstThunk == 0 && nameRva == 0 && firstThunk == 0) - { - break; - } - - ImportDescriptor descriptor = new ImportDescriptor - { - OriginalFirstThunk = originalFirstThunk, - TimeDateStamp = timeDateStamp, - ForwarderChain = forwarderChain, - Name = nameRva, - FirstThunk = firstThunk, - DllName = "Unknown" // Default name in case we can't read it - }; - - // Try to read the DLL name - try - { - if (nameRva != 0) - { - uint nameOffset = RvaToOffset(nameRva); - reader.BaseStream.Seek(nameOffset, SeekOrigin.Begin); - - // Read the null-terminated ASCII string - StringBuilder nameBuilder = new StringBuilder(); - byte b; - - while ((b = reader.ReadByte()) != 0) - { - nameBuilder.Append((char)b); - } - - descriptor.DllName = nameBuilder.ToString(); - } - } - catch (Exception) - { - // If we can't read the name, keep the default "Unknown" - } - - // Parse the imported functions - ParseImportedFunctions(reader, descriptor); - - descriptors.Add(descriptor); - - // Return to the import table to read the next descriptor - reader.BaseStream.Seek(importTableOffset + (descriptorCount * 20), SeekOrigin.Begin); - } - } - catch (Exception ex) - { - Console.WriteLine($"Error parsing import descriptors: {ex.Message}"); - // Return whatever descriptors we've managed to parse - } - - return descriptors; - } - - /// - /// Parses the imported functions for a given import descriptor - /// - private void ParseImportedFunctions(BinaryReader reader, ImportDescriptor descriptor) - { - try - { - // Use OriginalFirstThunk if available, otherwise use FirstThunk - uint thunkRva = descriptor.OriginalFirstThunk != 0 ? descriptor.OriginalFirstThunk : descriptor.FirstThunk; - - if (thunkRva == 0) - { - return; // No functions to parse - } - - uint thunkOffset = RvaToOffset(thunkRva); - int functionCount = 0; - - while (true) - { - reader.BaseStream.Seek(thunkOffset + (functionCount * 4), SeekOrigin.Begin); - uint thunkData = reader.ReadUInt32(); - - if (thunkData == 0) - { - break; // End of the function list - } - - ImportedFunction function = new ImportedFunction - { - ThunkRVA = thunkRva + (uint)(functionCount * 4) - }; - - // Check if imported by ordinal (high bit set) - if ((thunkData & 0x80000000) != 0) - { - function.IsOrdinal = true; - function.Ordinal = (ushort)(thunkData & 0xFFFF); - function.Name = $"Ordinal_{function.Ordinal}"; - } - else - { - // Imported by name - the thunkData is an RVA to a hint/name structure - try - { - uint hintNameOffset = RvaToOffset(thunkData); - reader.BaseStream.Seek(hintNameOffset, SeekOrigin.Begin); - - // Read the hint (2 bytes) - function.Hint = reader.ReadUInt16(); - - // Read the function name (null-terminated ASCII string) - StringBuilder nameBuilder = new StringBuilder(); - byte b; - - while ((b = reader.ReadByte()) != 0) - { - nameBuilder.Append((char)b); - } - - function.Name = nameBuilder.ToString(); - - if (string.IsNullOrEmpty(function.Name)) - { - function.Name = $"Function_at_{thunkData:X8}"; - } - } - catch (Exception) - { - function.Name = $"Function_at_{thunkData:X8}"; - } - } - - descriptor.Functions.Add(function); - functionCount++; - } - } - catch (Exception ex) - { - Console.WriteLine($"Error parsing imported functions for {descriptor.DllName}: {ex.Message}"); - } - } - - /// - /// Parses the exported functions using the export directory information - /// - private void ParseExportedFunctions(BinaryReader reader) - { - if (ExportDirectory == null) - { - return; - } - - // Read the array of function addresses (RVAs) - uint[] functionRVAs = new uint[ExportDirectory.NumberOfFunctions]; - reader.BaseStream.Seek(RvaToOffset(ExportDirectory.AddressOfFunctions), SeekOrigin.Begin); - for (int i = 0; i < ExportDirectory.NumberOfFunctions; i++) - { - functionRVAs[i] = reader.ReadUInt32(); - } - - // Read the array of name RVAs - uint[] nameRVAs = new uint[ExportDirectory.NumberOfNames]; - reader.BaseStream.Seek(RvaToOffset(ExportDirectory.AddressOfNames), SeekOrigin.Begin); - for (int i = 0; i < ExportDirectory.NumberOfNames; i++) - { - nameRVAs[i] = reader.ReadUInt32(); - } - - // Read the array of name ordinals - ushort[] nameOrdinals = new ushort[ExportDirectory.NumberOfNames]; - reader.BaseStream.Seek(RvaToOffset(ExportDirectory.AddressOfNameOrdinals), SeekOrigin.Begin); - for (int i = 0; i < ExportDirectory.NumberOfNames; i++) - { - nameOrdinals[i] = reader.ReadUInt16(); - } - - // Create a dictionary to map ordinals to names - Dictionary ordinalToName = new Dictionary(); - for (int i = 0; i < ExportDirectory.NumberOfNames; i++) - { - // Read the function name - reader.BaseStream.Seek(RvaToOffset(nameRVAs[i]), SeekOrigin.Begin); - List nameBytes = new List(); - byte b; - while ((b = reader.ReadByte()) != 0) - { - nameBytes.Add(b); - } - string name = Encoding.ASCII.GetString(nameBytes.ToArray()); - - // Map the ordinal to the name - ordinalToName[nameOrdinals[i]] = name; - } - - // Create the exported functions - for (ushort i = 0; i < ExportDirectory.NumberOfFunctions; i++) - { - uint functionRVA = functionRVAs[i]; - if (functionRVA == 0) - { - continue; // Skip empty entries - } - - ExportedFunction function = new ExportedFunction(); - function.Ordinal = (ushort)(i + ExportDirectory.Base); - function.Address = functionRVA; - - // Check if this function has a name - if (ordinalToName.TryGetValue(i, out string name)) - { - function.Name = name; - } - else - { - function.Name = $"Ordinal_{function.Ordinal}"; - } - - // Check if this is a forwarder - uint exportDirStart = OptionalHeader.DataDirectories[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress; - uint exportDirEnd = exportDirStart + OptionalHeader.DataDirectories[IMAGE_DIRECTORY_ENTRY_EXPORT].Size; - - if (functionRVA >= exportDirStart && functionRVA < exportDirEnd) - { - function.IsForwarder = true; - - // Read the forwarder string - reader.BaseStream.Seek(RvaToOffset(functionRVA), SeekOrigin.Begin); - List forwarderBytes = new List(); - byte b; - while ((b = reader.ReadByte()) != 0) - { - forwarderBytes.Add(b); - } - function.ForwarderName = Encoding.ASCII.GetString(forwarderBytes.ToArray()); - } - - ExportedFunctions.Add(function); - } - } - - /// - /// Gets the raw data for a specific section - /// - /// Index of the section - /// Byte array containing the section data - public byte[] GetSectionData(int sectionIndex) - { - if (sectionIndex < 0 || sectionIndex >= SectionHeaders.Count) - { - throw new ArgumentOutOfRangeException(nameof(sectionIndex)); - } - - SectionHeader section = SectionHeaders[sectionIndex]; - byte[] sectionData = new byte[section.SizeOfRawData]; - - Array.Copy(_fileData, section.PointerToRawData, sectionData, 0, section.SizeOfRawData); - - return sectionData; - } - - /// - /// Gets the raw data for a section by name - /// - /// Name of the section - /// Byte array containing the section data - public byte[] GetSectionData(string sectionName) - { - for (int i = 0; i < SectionHeaders.Count; i++) - { - if (SectionHeaders[i].Name == sectionName) - { - return GetSectionData(i); - } - } - - throw new ArgumentException($"Section '{sectionName}' not found"); - } - - /// - /// Checks if a section contains code - /// - /// The section to check - /// True if the section contains code, false otherwise - public bool IsSectionContainsCode(SectionHeader section) - { - return (section.Characteristics & IMAGE_SCN_CNT_CODE) != 0 || - (section.Characteristics & IMAGE_SCN_MEM_EXECUTE) != 0; - } - - /// - /// Gets all code sections - /// - /// List of section indices that contain code - public List GetCodeSections() - { - List codeSections = new List(); - - for (int i = 0; i < SectionHeaders.Count; i++) - { - if (IsSectionContainsCode(SectionHeaders[i])) - { - codeSections.Add(i); - } - } - - return codeSections; - } - - /// - /// Converts a Relative Virtual Address (RVA) to a file offset - /// - /// The RVA to convert - /// The corresponding file offset - public uint RvaToOffset(uint rva) - { - if (rva == 0) - { - return 0; - } - - foreach (var section in SectionHeaders) - { - // Check if the RVA is within this section - if (rva >= section.VirtualAddress && rva < section.VirtualAddress + section.VirtualSize) - { - // Calculate the offset within the section - uint offsetInSection = rva - section.VirtualAddress; - - // Make sure we don't exceed the raw data size - if (offsetInSection < section.SizeOfRawData) - { - return section.PointerToRawData + offsetInSection; - } - } - } - - // If the RVA is not within any section, it might be in the headers - if (rva < OptionalHeader.SizeOfHeaders) - { - return rva; - } - - throw new ArgumentException($"RVA {rva:X8} is not within any section"); - } - } - - #region PE Format Structures - - /// - /// DOS Header structure - /// - public class DOSHeader - { - public ushort e_magic; // Magic number ("MZ") - public ushort e_cblp; // Bytes on last page of file - public ushort e_cp; // Pages in file - public ushort e_crlc; // Relocations - public ushort e_cparhdr; // Size of header in paragraphs - public ushort e_minalloc; // Minimum extra paragraphs needed - public ushort e_maxalloc; // Maximum extra paragraphs needed - public ushort e_ss; // Initial (relative) SS value - public ushort e_sp; // Initial SP value - public ushort e_csum; // Checksum - public ushort e_ip; // Initial IP value - public ushort e_cs; // Initial (relative) CS value - public ushort e_lfarlc; // File address of relocation table - public ushort e_ovno; // Overlay number - public ushort[] e_res; // Reserved words - public ushort e_oemid; // OEM identifier - public ushort e_oeminfo; // OEM information - public ushort[] e_res2; // Reserved words - public uint e_lfanew; // File address of new exe header - } - - /// - /// File Header structure - /// - public class FileHeader - { - public ushort Machine; // Target machine type - public ushort NumberOfSections; // Number of sections - public uint TimeDateStamp; // Time stamp - public uint PointerToSymbolTable; // File offset of symbol table - public uint NumberOfSymbols; // Number of symbols - public ushort SizeOfOptionalHeader; // Size of optional header - public ushort Characteristics; // Characteristics - } - - /// - /// Optional Header structure - /// - public class OptionalHeader - { - // Standard fields - public ushort Magic; // Magic number (PE32 or PE32+) - public byte MajorLinkerVersion; // Major linker version - public byte MinorLinkerVersion; // Minor linker version - public uint SizeOfCode; // Size of code section - public uint SizeOfInitializedData; // Size of initialized data - public uint SizeOfUninitializedData; // Size of uninitialized data - public uint AddressOfEntryPoint; // Entry point RVA - public uint BaseOfCode; // Base of code section - public uint BaseOfData; // Base of data section (PE32 only) - - // Windows-specific fields - public dynamic ImageBase; // Preferred image base (uint for PE32, ulong for PE32+) - public uint SectionAlignment; // Section alignment - public uint FileAlignment; // File alignment - public ushort MajorOperatingSystemVersion; // Major OS version - public ushort MinorOperatingSystemVersion; // Minor OS version - public ushort MajorImageVersion; // Major image version - public ushort MinorImageVersion; // Minor image version - public ushort MajorSubsystemVersion; // Major subsystem version - public ushort MinorSubsystemVersion; // Minor subsystem version - public uint Win32VersionValue; // Win32 version value - public uint SizeOfImage; // Size of image - public uint SizeOfHeaders; // Size of headers - public uint CheckSum; // Checksum - public ushort Subsystem; // Subsystem - public ushort DllCharacteristics; // DLL characteristics - public dynamic SizeOfStackReserve; // Size of stack reserve (uint for PE32, ulong for PE32+) - public dynamic SizeOfStackCommit; // Size of stack commit (uint for PE32, ulong for PE32+) - public dynamic SizeOfHeapReserve; // Size of heap reserve (uint for PE32, ulong for PE32+) - public dynamic SizeOfHeapCommit; // Size of heap commit (uint for PE32, ulong for PE32+) - public uint LoaderFlags; // Loader flags - public uint NumberOfRvaAndSizes; // Number of data directories - - // Data directories - public DataDirectory[] DataDirectories; // Data directories - } - - /// - /// Data Directory structure - /// - public class DataDirectory - { - public uint VirtualAddress; // RVA of the directory - public uint Size; // Size of the directory - } - - /// - /// Section Header structure - /// - public class SectionHeader - { - public string Name; // Section name - public uint VirtualSize; // Virtual size - public uint VirtualAddress; // Virtual address (RVA) - public uint SizeOfRawData; // Size of raw data - public uint PointerToRawData; // File pointer to raw data - public uint PointerToRelocations; // File pointer to relocations - public uint PointerToLinenumbers; // File pointer to line numbers - public ushort NumberOfRelocations; // Number of relocations - public ushort NumberOfLinenumbers; // Number of line numbers - public uint Characteristics; // Characteristics - } - - #endregion - - #region Export and Import Structures - - /// - /// Export Directory structure - /// - public class ExportDirectory - { - public uint Characteristics; - public uint TimeDateStamp; - public ushort MajorVersion; - public ushort MinorVersion; - public uint Name; // RVA to the DLL name - public string DllName; // Actual DLL name - public uint Base; // Ordinal base - public uint NumberOfFunctions; // Number of exported functions - public uint NumberOfNames; // Number of exported names - public uint AddressOfFunctions; // RVA to function addresses - public uint AddressOfNames; // RVA to function names - public uint AddressOfNameOrdinals; // RVA to ordinals - } - - /// - /// Represents an exported function - /// - public class ExportedFunction - { - public string Name; // Function name - public uint Address; // Function RVA - public ushort Ordinal; // Function ordinal - public bool IsForwarder; // True if this is a forwarder - public string ForwarderName; // Name of the forwarded function (if IsForwarder is true) - } - - /// - /// Import Descriptor structure - /// - public class ImportDescriptor - { - public uint OriginalFirstThunk; // RVA to Import Lookup Table - public uint TimeDateStamp; - public uint ForwarderChain; - public uint Name; // RVA to the DLL name - public string DllName; // Actual DLL name - public uint FirstThunk; // RVA to Import Address Table - public List Functions; // List of imported functions - - public ImportDescriptor() - { - Functions = new List(); - } - } - - /// - /// Represents an imported function - /// - public class ImportedFunction - { - public bool IsOrdinal; // True if imported by ordinal - public ushort Ordinal; // Ordinal value (if IsOrdinal is true) - public string Name; // Function name (if IsOrdinal is false) - public ushort Hint; // Hint value (if IsOrdinal is false) - public uint ThunkRVA; // RVA in the Import Address Table - } - - #endregion -} diff --git a/X86Disassembler/Program.cs b/X86Disassembler/Program.cs index 1077eeb..7dd46bf 100644 --- a/X86Disassembler/Program.cs +++ b/X86Disassembler/Program.cs @@ -1,144 +1,207 @@ -using X86Disassembler.PE; - namespace X86Disassembler; +using System; +using System.IO; +using System.Text; +using System.Collections.Generic; +using X86Disassembler.PE; +using X86Disassembler.X86; + internal class Program { // Path to the DLL file to disassemble private const string DllPath = @"C:\Program Files (x86)\Nikita\Iron Strategy\Terrain.dll"; // Example path, replace with your target DLL - + + // Maximum number of instructions to display per section + private const int MaxInstructionsToDisplay = 50; + static void Main(string[] args) { Console.WriteLine("X86 Disassembler and Decompiler"); Console.WriteLine("--------------------------------"); - - Console.WriteLine($"Loading file: {DllPath}"); - - // Load the DLL file - byte[] binaryData = File.ReadAllBytes(DllPath); - - Console.WriteLine($"Successfully loaded {DllPath}"); - Console.WriteLine($"File size: {binaryData.Length} bytes"); - - // Create the PE format parser - PEFormat peFile = new PEFormat(binaryData); - - // Parse the PE format - Console.WriteLine("\nParsing PE format..."); - if (!peFile.Parse()) - { - Console.WriteLine("Failed to parse PE file. Exiting."); - return; - } - - // Display basic PE information - DisplayPEInfo(peFile); - - // Display exported functions - DisplayExportedFunctions(peFile); - - // Display imported functions - DisplayImportedFunctions(peFile); - - // Find code sections for disassembly - var codeSections = peFile.GetCodeSections(); - Console.WriteLine($"\nFound {codeSections.Count} code section(s):"); - - foreach (int sectionIndex in codeSections) - { - var section = peFile.SectionHeaders[sectionIndex]; - Console.WriteLine($" - {section.Name}: Size={section.SizeOfRawData} bytes, RVA=0x{section.VirtualAddress:X8}"); - - // Get the section data for disassembly - byte[] sectionData = peFile.GetSectionData(sectionIndex); - - // TODO: Implement disassembling logic here - // This is where we would pass the section data to our disassembler - } - - Console.WriteLine("\nPress any key to exit..."); - Console.ReadKey(); - } - private static void DisplayPEInfo(PEFormat peFile) - { - Console.WriteLine("\nPE File Information:"); - Console.WriteLine($"Architecture: {(peFile.Is64Bit ? "64-bit" : "32-bit")}"); - Console.WriteLine($"Entry Point: 0x{peFile.OptionalHeader.AddressOfEntryPoint:X8}"); - Console.WriteLine($"Image Base: 0x{peFile.OptionalHeader.ImageBase:X}"); - Console.WriteLine($"Number of Sections: {peFile.FileHeader.NumberOfSections}"); - - // Display section information - Console.WriteLine("\nSections:"); - for (int i = 0; i < peFile.SectionHeaders.Count; i++) - { - var section = peFile.SectionHeaders[i]; - string flags = ""; - - if ((section.Characteristics & 0x00000020) != 0) flags += "Code "; // IMAGE_SCN_CNT_CODE - if ((section.Characteristics & 0x20000000) != 0) flags += "Exec "; // IMAGE_SCN_MEM_EXECUTE - if ((section.Characteristics & 0x40000000) != 0) flags += "Read "; // IMAGE_SCN_MEM_READ - if ((section.Characteristics & 0x80000000) != 0) flags += "Write"; // IMAGE_SCN_MEM_WRITE - - Console.WriteLine($" {i}: {section.Name,-8} VA=0x{section.VirtualAddress:X8} Size={section.SizeOfRawData,-8} [{flags}]"); - } - } + string filePath = DllPath; - private static void DisplayExportedFunctions(PEFormat peFile) - { - if (peFile.ExportDirectory == null) - { - Console.WriteLine("\nNo exported functions found."); - return; - } - - Console.WriteLine("\nExported Functions:"); - Console.WriteLine($"DLL Name: {peFile.ExportDirectory.DllName}"); - Console.WriteLine($"Number of Functions: {peFile.ExportDirectory.NumberOfFunctions}"); - Console.WriteLine($"Number of Names: {peFile.ExportDirectory.NumberOfNames}"); - - // Display all exported functions - for (int i = 0; i < peFile.ExportedFunctions.Count; i++) - { - var function = peFile.ExportedFunctions[i]; - Console.WriteLine($" {i}: {function.Name} (Ordinal={function.Ordinal}, RVA=0x{function.Address:X8})"); - } - } + Console.WriteLine($"Loading file: {filePath}"); - private static void DisplayImportedFunctions(PEFormat peFile) - { - if (peFile.ImportDescriptors.Count == 0) + try { - Console.WriteLine("\nNo imported functions found."); - return; - } + // Load the file into memory + byte[] fileBytes = File.ReadAllBytes(filePath); + Console.WriteLine($"Successfully loaded {filePath}"); + Console.WriteLine($"File size: {fileBytes.Length} bytes"); + Console.WriteLine(); - Console.WriteLine("\nImported Functions:"); - Console.WriteLine($"Number of Imported DLLs: {peFile.ImportDescriptors.Count}"); + Console.WriteLine("Parsing PE format..."); + Console.WriteLine(); - // Display all imported DLLs and their functions - for (int i = 0; i < peFile.ImportDescriptors.Count; i++) - { - var descriptor = peFile.ImportDescriptors[i]; - Console.WriteLine($" DLL: {descriptor.DllName}"); - - // Display all functions from this DLL - for (int j = 0; j < descriptor.Functions.Count; j++) + // Parse the PE format + PEFormat peFormat = new PEFormat(fileBytes); + if (!peFormat.Parse()) { - var function = descriptor.Functions[j]; - if (function.IsOrdinal) + Console.WriteLine("Failed to parse PE file."); + return; + } + + // Display PE information + DisplayPEInfo(peFormat); + + // Disassemble code sections + DisassembleCodeSections(peFormat); + + Console.WriteLine(); + Console.WriteLine("Press any key to exit..."); + Console.ReadKey(); + } + catch (Exception ex) + { + Console.WriteLine($"Error: {ex.Message}"); + Console.WriteLine(ex.StackTrace); + } + } + + /// + /// Displays information about the PE file + /// + /// The PE format object + private static void DisplayPEInfo(PEFormat peFormat) + { + Console.WriteLine("PE File Information:"); + Console.WriteLine($"Architecture: {(peFormat.OptionalHeader.Is64Bit() ? "64-bit" : "32-bit")}"); + Console.WriteLine($"Entry Point: 0x{peFormat.OptionalHeader.AddressOfEntryPoint:X8}"); + Console.WriteLine($"Image Base: 0x{peFormat.OptionalHeader.ImageBase:X8}"); + Console.WriteLine($"Number of Sections: {peFormat.FileHeader.NumberOfSections}"); + + Console.WriteLine("\nSections:"); + for (int i = 0; i < peFormat.SectionHeaders.Count; i++) + { + var section = peFormat.SectionHeaders[i]; + string flags = ""; + + // Use the section's methods to determine characteristics + if (section.ContainsCode()) flags += "Code "; + if (section.IsExecutable()) flags += "Exec "; + if (section.IsReadable()) flags += "Read "; + if (section.IsWritable()) flags += "Write"; + + Console.WriteLine($" {i}: {section.Name,-8} VA=0x{section.VirtualAddress:X8} Size={section.VirtualSize,-8} [{flags}]"); + } + + // Display exported functions + if (peFormat.ExportDirectory != null) + { + Console.WriteLine("\nExported Functions:"); + Console.WriteLine($"DLL Name: {peFormat.ExportDirectory.Name}"); + Console.WriteLine($"Number of Functions: {peFormat.ExportDirectory.NumberOfFunctions}"); + Console.WriteLine($"Number of Names: {peFormat.ExportDirectory.NumberOfNames}"); + + for (int i = 0; i < peFormat.ExportedFunctions.Count; i++) + { + var function = peFormat.ExportedFunctions[i]; + Console.WriteLine($" {i}: {function.Name} (Ordinal={function.Ordinal}, RVA=0x{function.Address:X8})"); + } + } + + // Display imported functions + if (peFormat.ImportDescriptors.Count > 0) + { + Console.WriteLine("\nImported Functions:"); + Console.WriteLine($"Number of Imported DLLs: {peFormat.ImportDescriptors.Count}"); + + for (int i = 0; i < peFormat.ImportDescriptors.Count; i++) + { + var descriptor = peFormat.ImportDescriptors[i]; + Console.WriteLine($" DLL: {descriptor.Name}"); + + for (int j = 0; j < descriptor.Functions.Count; j++) { - Console.WriteLine($" {j}: Ordinal {function.Ordinal}"); + var function = descriptor.Functions[j]; + if (function.IsOrdinal) + { + Console.WriteLine($" {j}: Ordinal {function.Ordinal}"); + } + else + { + Console.WriteLine($" {j}: {function.Name} (Hint={function.Hint})"); + } } - else + + if (i < peFormat.ImportDescriptors.Count - 1) { - Console.WriteLine($" {j}: {function.Name} (Hint={function.Hint})"); + Console.WriteLine(); // Add a blank line between DLLs for better readability } } + } + } + + /// + /// Disassembles the code sections of the PE file + /// + /// The PE format object + private static void DisassembleCodeSections(PEFormat peFormat) + { + // Find code sections + var codeSections = peFormat.SectionHeaders.FindAll(s => s.ContainsCode()); + + Console.WriteLine($"\nFound {codeSections.Count} code section(s):"); + foreach (var section in codeSections) + { + Console.WriteLine($" - {section.Name}: Size={section.VirtualSize} bytes, RVA=0x{section.VirtualAddress:X8}"); + } + Console.WriteLine(); + + // Disassemble each code section + for (int i = 0; i < peFormat.SectionHeaders.Count; i++) + { + var section = peFormat.SectionHeaders[i]; + + // Skip non-code sections + if (!section.ContainsCode()) + continue; - if (i < peFile.ImportDescriptors.Count - 1) + Console.WriteLine($"Disassembling section {section.Name} at RVA 0x{section.VirtualAddress:X8}:"); + + // Get section data using the section index + byte[] sectionData = peFormat.GetSectionData(i); + + // Create a disassembler for this section + ulong baseAddress = peFormat.OptionalHeader.ImageBase + section.VirtualAddress; + Disassembler disassembler = new Disassembler(sectionData, baseAddress); + + // Disassemble and display instructions + int count = 0; + int maxInstructions = MaxInstructionsToDisplay; // Use the constant + + while (count < maxInstructions) { - Console.WriteLine(); // Add a blank line between DLLs for better readability + Instruction? instruction = disassembler.DisassembleNext(); + if (instruction == null) + { + break; + } + + // Format the instruction bytes + StringBuilder bytesStr = new StringBuilder(); + foreach (byte b in instruction.Bytes) + { + bytesStr.Append($"{b:X2} "); + } + + // Format the instruction + // Calculate the RVA by subtracting the image base + ulong rva = instruction.Address - peFormat.OptionalHeader.ImageBase; + string addressStr = $"{rva:X8}"; + string bytesDisplay = bytesStr.ToString().PadRight(20); // Pad to 20 characters + string operandsStr = string.IsNullOrEmpty(instruction.Operands) ? "" : $" {instruction.Operands}"; + + Console.WriteLine($" {addressStr} {bytesDisplay} {instruction.Mnemonic}{operandsStr}"); + + count++; + } + + if (sectionData.Length > count * 10) // If we've only shown a small portion + { + Console.WriteLine($" ... ({sectionData.Length - (count * 10)} more bytes not shown)"); } } } diff --git a/X86Disassembler/X86/Disassembler.cs b/X86Disassembler/X86/Disassembler.cs new file mode 100644 index 0000000..5cf3c3f --- /dev/null +++ b/X86Disassembler/X86/Disassembler.cs @@ -0,0 +1,86 @@ +using System.Text; + +namespace X86Disassembler.X86; + +/// +/// Core x86 instruction disassembler +/// +public class Disassembler +{ + // Buffer containing the code to disassemble + private readonly byte[] _codeBuffer; + + // Base address for the code (RVA) + private readonly ulong _baseAddress; + + // Current position in the code buffer + private int _position; + + // Instruction decoder + private readonly InstructionDecoder _decoder; + + /// + /// Initializes a new instance of the Disassembler class + /// + /// The buffer containing the code to disassemble + /// The base address (RVA) of the code + public Disassembler(byte[] codeBuffer, ulong baseAddress) + { + _codeBuffer = codeBuffer; + _baseAddress = baseAddress; + _position = 0; + _decoder = new InstructionDecoder(codeBuffer); + } + + /// + /// Disassembles the next instruction in the code buffer + /// + /// The disassembled instruction, or null if the end of the buffer is reached + public Instruction? DisassembleNext() + { + if (_position >= _codeBuffer.Length) + { + return null; // End of buffer reached + } + + // Create a new instruction + Instruction instruction = new Instruction + { + Address = _baseAddress + (uint)_position + }; + + // Decode the instruction + int bytesRead = _decoder.DecodeAt(_position, instruction); + + if (bytesRead == 0) + { + return null; // Failed to decode instruction + } + + // Update position + _position += bytesRead; + + return instruction; + } + + /// + /// Disassembles all instructions in the code buffer + /// + /// A list of disassembled instructions + public List DisassembleAll() + { + List instructions = new List(); + + // Reset position + _position = 0; + + // Disassemble all instructions + Instruction? instruction; + while ((instruction = DisassembleNext()) != null) + { + instructions.Add(instruction); + } + + return instructions; + } +} diff --git a/X86Disassembler/X86/Instruction.cs b/X86Disassembler/X86/Instruction.cs new file mode 100644 index 0000000..c744b0d --- /dev/null +++ b/X86Disassembler/X86/Instruction.cs @@ -0,0 +1,50 @@ +namespace X86Disassembler.X86; + +/// +/// Represents a decoded x86 instruction +/// +public class Instruction +{ + /// + /// The address of the instruction in memory + /// + public ulong Address { get; set; } + + /// + /// The raw bytes of the instruction + /// + public byte[] Bytes { get; set; } = Array.Empty(); + + /// + /// The mnemonic of the instruction (e.g., "mov", "add", "jmp") + /// + public string Mnemonic { get; set; } = string.Empty; + + /// + /// The operands of the instruction as a formatted string + /// + public string Operands { get; set; } = string.Empty; + + /// + /// The length of the instruction in bytes + /// + public int Length => Bytes.Length; + + /// + /// Returns a string representation of the instruction + /// + /// A formatted string representing the instruction + public override string ToString() + { + return $"{Address:X8} {BytesToString()} {Mnemonic} {Operands}".Trim(); + } + + /// + /// Converts the instruction bytes to a formatted hex string + /// + /// A formatted hex string of the instruction bytes + private string BytesToString() + { + return string.Join(" ", Bytes.Select(b => b.ToString("X2"))); + } +} diff --git a/X86Disassembler/X86/InstructionDecoder.cs b/X86Disassembler/X86/InstructionDecoder.cs new file mode 100644 index 0000000..b85f95e --- /dev/null +++ b/X86Disassembler/X86/InstructionDecoder.cs @@ -0,0 +1,504 @@ +namespace X86Disassembler.X86; + +/// +/// Decoder for x86 instructions +/// +public class InstructionDecoder +{ + // Instruction prefixes + private const byte PREFIX_LOCK = 0xF0; + private const byte PREFIX_REPNE = 0xF2; + private const byte PREFIX_REP = 0xF3; + private const byte PREFIX_CS = 0x2E; + private const byte PREFIX_SS = 0x36; + private const byte PREFIX_DS = 0x3E; + private const byte PREFIX_ES = 0x26; + private const byte PREFIX_FS = 0x64; + private const byte PREFIX_GS = 0x65; + private const byte PREFIX_OPERAND_SIZE = 0x66; + private const byte PREFIX_ADDRESS_SIZE = 0x67; + + // Common opcodes + private const byte OPCODE_INT3 = 0xCC; + private const byte OPCODE_NOP = 0x90; + private const byte OPCODE_RET = 0xC3; + private const byte OPCODE_CALL_NEAR_RELATIVE = 0xE8; + private const byte OPCODE_JMP_NEAR_RELATIVE = 0xE9; + private const byte OPCODE_JMP_SHORT_RELATIVE = 0xEB; + + // Opcode groups + private const byte OPCODE_GROUP_1_BYTE = 0x80; + private const byte OPCODE_GROUP_1_WORD_DWORD = 0x81; + private const byte OPCODE_GROUP_1_BYTE_IMM8 = 0x83; + + // ModR/M byte masks + private const byte MODRM_MOD_MASK = 0xC0; // 11000000b + private const byte MODRM_REG_MASK = 0x38; // 00111000b + private const byte MODRM_RM_MASK = 0x07; // 00000111b + + // SIB byte masks + private const byte SIB_SCALE_MASK = 0xC0; // 11000000b + private const byte SIB_INDEX_MASK = 0x38; // 00111000b + private const byte SIB_BASE_MASK = 0x07; // 00000111b + + // Register names + private static readonly string[] RegisterNames8 = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; + private static readonly string[] RegisterNames16 = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; + private static readonly string[] RegisterNames32 = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" }; + private static readonly string[] SegmentRegisterNames = { "es", "cs", "ss", "ds", "fs", "gs" }; + + // Condition codes for conditional jumps + private static readonly string[] ConditionCodes = { + "o", "no", "b", "ae", "e", "ne", "be", "a", + "s", "ns", "p", "np", "l", "ge", "le", "g" + }; + + // One-byte opcode map + private static readonly string[] OneByteOpcodes = new string[256]; + + // Buffer containing the code to decode + private readonly byte[] _codeBuffer; + + // Current position in the code buffer + private int _position; + + // Length of the buffer + private readonly int _length; + + /// + /// Static constructor to initialize the opcode maps + /// + static InstructionDecoder() + { + InitializeOpcodeMaps(); + } + + /// + /// Initializes the opcode maps + /// + private static void InitializeOpcodeMaps() + { + // Initialize all entries to "??" (unknown) + for (int i = 0; i < 256; i++) + { + OneByteOpcodes[i] = "??"; + } + + // Data transfer instructions + for (int i = 0x88; i <= 0x8B; i++) + { + OneByteOpcodes[i] = "mov"; + } + OneByteOpcodes[0xA0] = "mov"; // MOV AL, moffs8 + OneByteOpcodes[0xA1] = "mov"; // MOV EAX, moffs32 + OneByteOpcodes[0xA2] = "mov"; // MOV moffs8, AL + OneByteOpcodes[0xA3] = "mov"; // MOV moffs32, EAX + for (int i = 0xB0; i <= 0xB7; i++) + { + OneByteOpcodes[i] = "mov"; // MOV r8, imm8 + } + for (int i = 0xB8; i <= 0xBF; i++) + { + OneByteOpcodes[i] = "mov"; // MOV r32, imm32 + } + OneByteOpcodes[0xC6] = "mov"; // MOV r/m8, imm8 + OneByteOpcodes[0xC7] = "mov"; // MOV r/m32, imm32 + + // Push/Pop instructions + for (int i = 0x50; i <= 0x57; i++) + { + OneByteOpcodes[i] = "push"; // PUSH r32 + } + for (int i = 0x58; i <= 0x5F; i++) + { + OneByteOpcodes[i] = "pop"; // POP r32 + } + OneByteOpcodes[0x68] = "push"; // PUSH imm32 + OneByteOpcodes[0x6A] = "push"; // PUSH imm8 + OneByteOpcodes[0x8F] = "pop"; // POP r/m32 + OneByteOpcodes[0x9C] = "pushf"; // PUSHF + OneByteOpcodes[0x9D] = "popf"; // POPF + + // Arithmetic instructions + for (int i = 0x00; i <= 0x05; i++) + { + OneByteOpcodes[i] = "add"; + } + for (int i = 0x28; i <= 0x2D; i++) + { + OneByteOpcodes[i] = "sub"; + } + for (int i = 0x30; i <= 0x35; i++) + { + OneByteOpcodes[i] = "xor"; + } + for (int i = 0x38; i <= 0x3D; i++) + { + OneByteOpcodes[i] = "cmp"; + } + OneByteOpcodes[0x40] = "inc"; // INC eax + OneByteOpcodes[0x41] = "inc"; // INC ecx + OneByteOpcodes[0x42] = "inc"; // INC edx + OneByteOpcodes[0x43] = "inc"; // INC ebx + OneByteOpcodes[0x44] = "inc"; // INC esp + OneByteOpcodes[0x45] = "inc"; // INC ebp + OneByteOpcodes[0x46] = "inc"; // INC esi + OneByteOpcodes[0x47] = "inc"; // INC edi + OneByteOpcodes[0x48] = "dec"; // DEC eax + OneByteOpcodes[0x49] = "dec"; // DEC ecx + OneByteOpcodes[0x4A] = "dec"; // DEC edx + OneByteOpcodes[0x4B] = "dec"; // DEC ebx + OneByteOpcodes[0x4C] = "dec"; // DEC esp + OneByteOpcodes[0x4D] = "dec"; // DEC ebp + OneByteOpcodes[0x4E] = "dec"; // DEC esi + OneByteOpcodes[0x4F] = "dec"; // DEC edi + + // Logical instructions + for (int i = 0x20; i <= 0x25; i++) + { + OneByteOpcodes[i] = "and"; + } + for (int i = 0x08; i <= 0x0D; i++) + { + OneByteOpcodes[i] = "or"; + } + OneByteOpcodes[0xF7] = "not"; // Group 3 - NOT, NEG, MUL, IMUL, DIV, IDIV + + // Shift and rotate instructions + OneByteOpcodes[0xD0] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR + OneByteOpcodes[0xD1] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR + OneByteOpcodes[0xD2] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR + OneByteOpcodes[0xD3] = "rol"; // Group 2 - ROL, ROR, RCL, RCR, SHL/SAL, SHR, SAR + + // Control flow instructions + OneByteOpcodes[0xC3] = "ret"; + OneByteOpcodes[0xC2] = "ret"; + OneByteOpcodes[0xCA] = "retf"; + OneByteOpcodes[0xCB] = "retf"; + OneByteOpcodes[0xCC] = "int3"; + OneByteOpcodes[0xCD] = "int"; + OneByteOpcodes[0xCE] = "into"; + OneByteOpcodes[0xCF] = "iret"; + OneByteOpcodes[0xE8] = "call"; + OneByteOpcodes[0xE9] = "jmp"; + OneByteOpcodes[0xEB] = "jmp"; + OneByteOpcodes[0xFF] = "call"; // Group 5 - CALL, JMP, PUSH + + // Conditional jumps + for (int i = 0x70; i <= 0x7F; i++) + { + OneByteOpcodes[i] = "j" + ConditionCodes[i - 0x70]; + } + + // String instructions + OneByteOpcodes[0xA4] = "movsb"; + OneByteOpcodes[0xA5] = "movsd"; + OneByteOpcodes[0xA6] = "cmpsb"; + OneByteOpcodes[0xA7] = "cmpsd"; + OneByteOpcodes[0xAA] = "stosb"; + OneByteOpcodes[0xAB] = "stosd"; + OneByteOpcodes[0xAC] = "lodsb"; + OneByteOpcodes[0xAD] = "lodsd"; + OneByteOpcodes[0xAE] = "scasb"; + OneByteOpcodes[0xAF] = "scasd"; + + // Misc instructions + OneByteOpcodes[0x90] = "nop"; + OneByteOpcodes[0x91] = "xchg"; // XCHG eax, ecx + OneByteOpcodes[0x92] = "xchg"; // XCHG eax, edx + OneByteOpcodes[0x93] = "xchg"; // XCHG eax, ebx + OneByteOpcodes[0x94] = "xchg"; // XCHG eax, esp + OneByteOpcodes[0x95] = "xchg"; // XCHG eax, ebp + OneByteOpcodes[0x96] = "xchg"; // XCHG eax, esi + OneByteOpcodes[0x97] = "xchg"; // XCHG eax, edi + OneByteOpcodes[0x98] = "cwde"; + OneByteOpcodes[0x99] = "cdq"; + OneByteOpcodes[0xF4] = "hlt"; + OneByteOpcodes[0xF5] = "cmc"; + OneByteOpcodes[0xF8] = "clc"; + OneByteOpcodes[0xF9] = "stc"; + OneByteOpcodes[0xFA] = "cli"; + OneByteOpcodes[0xFB] = "sti"; + OneByteOpcodes[0xFC] = "cld"; + OneByteOpcodes[0xFD] = "std"; + } + + /// + /// Initializes a new instance of the InstructionDecoder class + /// + /// The buffer containing the code to decode + public InstructionDecoder(byte[] codeBuffer) + { + _codeBuffer = codeBuffer; + _position = 0; + _length = codeBuffer.Length; + } + + /// + /// Decodes an instruction at the specified position in the code buffer + /// + /// The position in the code buffer + /// The instruction object to populate + /// The number of bytes read + public int DecodeAt(int position, Instruction instruction) + { + _position = position; + return Decode(instruction); + } + + /// + /// Decodes an instruction at the current position in the code buffer + /// + /// The instruction object to populate + /// The number of bytes read + public int Decode(Instruction instruction) + { + // Store the starting position + int startPosition = _position; + + // Check if we've reached the end of the buffer + if (_position >= _length) + { + return 0; + } + + // Handle instruction prefixes + bool hasPrefix = true; + bool operandSizePrefix = false; + bool addressSizePrefix = false; + string segmentOverride = string.Empty; + + while (hasPrefix && _position < _length) + { + byte prefix = _codeBuffer[_position]; + + switch (prefix) + { + case PREFIX_LOCK: + case PREFIX_REPNE: + case PREFIX_REP: + _position++; + break; + + case PREFIX_CS: + segmentOverride = "cs"; + _position++; + break; + + case PREFIX_SS: + segmentOverride = "ss"; + _position++; + break; + + case PREFIX_DS: + segmentOverride = "ds"; + _position++; + break; + + case PREFIX_ES: + segmentOverride = "es"; + _position++; + break; + + case PREFIX_FS: + segmentOverride = "fs"; + _position++; + break; + + case PREFIX_GS: + segmentOverride = "gs"; + _position++; + break; + + case PREFIX_OPERAND_SIZE: + operandSizePrefix = true; + _position++; + break; + + case PREFIX_ADDRESS_SIZE: + addressSizePrefix = true; + _position++; + break; + + default: + hasPrefix = false; + break; + } + } + + // We've reached the end of the buffer after processing prefixes + if (_position >= _length) + { + return _position - startPosition; + } + + // Read the opcode + byte opcode = _codeBuffer[_position++]; + + // Get the mnemonic from the opcode map + string mnemonic = OneByteOpcodes[opcode]; + + // Handle specific opcodes + string operands = string.Empty; + + switch (opcode) + { + case OPCODE_INT3: + // No operands for INT3 + break; + + case OPCODE_NOP: + // No operands for NOP + break; + + case OPCODE_RET: + // No operands for RET + break; + + case OPCODE_CALL_NEAR_RELATIVE: + if (_position + 4 <= _length) + { + // Read 32-bit relative offset + int offset = BitConverter.ToInt32(_codeBuffer, _position); + _position += 4; + + // Calculate target address (relative to next instruction) + uint targetAddress = (uint)(_position + offset); + operands = $"0x{targetAddress:X8}"; + } + break; + + case OPCODE_JMP_NEAR_RELATIVE: + if (_position + 4 <= _length) + { + // Read 32-bit relative offset + int offset = BitConverter.ToInt32(_codeBuffer, _position); + _position += 4; + + // Calculate target address (relative to next instruction) + uint targetAddress = (uint)(_position + offset); + operands = $"0x{targetAddress:X8}"; + } + break; + + case OPCODE_JMP_SHORT_RELATIVE: + if (_position < _length) + { + // Read 8-bit relative offset + sbyte offset = (sbyte)_codeBuffer[_position++]; + + // Calculate target address (relative to next instruction) + uint targetAddress = (uint)(_position + offset); + operands = $"0x{targetAddress:X8}"; + } + break; + + default: + // Handle register-based instructions + if (opcode >= 0x40 && opcode <= 0x47) // INC r32 + { + int reg = opcode - 0x40; + operands = RegisterNames32[reg]; + } + else if (opcode >= 0x48 && opcode <= 0x4F) // DEC r32 + { + int reg = opcode - 0x48; + operands = RegisterNames32[reg]; + } + else if (opcode >= 0x50 && opcode <= 0x57) // PUSH r32 + { + int reg = opcode - 0x50; + operands = RegisterNames32[reg]; + } + else if (opcode >= 0x58 && opcode <= 0x5F) // POP r32 + { + int reg = opcode - 0x58; + operands = RegisterNames32[reg]; + } + else if (opcode >= 0x91 && opcode <= 0x97) // XCHG eax, r32 + { + int reg = opcode - 0x90; + operands = $"eax, {RegisterNames32[reg]}"; + } + else if (opcode >= 0xB0 && opcode <= 0xB7) // MOV r8, imm8 + { + if (_position < _length) + { + int reg = opcode - 0xB0; + byte imm8 = _codeBuffer[_position++]; + operands = $"{RegisterNames8[reg]}, 0x{imm8:X2}"; + } + } + else if (opcode >= 0xB8 && opcode <= 0xBF) // MOV r32, imm32 + { + if (_position + 4 <= _length) + { + int reg = opcode - 0xB8; + uint imm32 = BitConverter.ToUInt32(_codeBuffer, _position); + _position += 4; + operands = $"{RegisterNames32[reg]}, 0x{imm32:X8}"; + } + } + else if (opcode >= 0x70 && opcode <= 0x7F) // Conditional jumps (short) + { + if (_position < _length) + { + sbyte offset = (sbyte)_codeBuffer[_position++]; + uint targetAddress = (uint)(_position + offset); + operands = $"0x{targetAddress:X8}"; + } + } + else if (opcode == 0x68) // PUSH imm32 + { + if (_position + 4 <= _length) + { + uint imm32 = BitConverter.ToUInt32(_codeBuffer, _position); + _position += 4; + operands = $"0x{imm32:X8}"; + } + } + else if (opcode == 0x6A) // PUSH imm8 + { + if (_position < _length) + { + byte imm8 = _codeBuffer[_position++]; + operands = $"0x{imm8:X2}"; + } + } + else if (opcode == 0xCD) // INT imm8 + { + if (_position < _length) + { + byte imm8 = _codeBuffer[_position++]; + operands = $"0x{imm8:X2}"; + } + } + else if (opcode == 0xE3) // JECXZ rel8 + { + if (_position < _length) + { + sbyte offset = (sbyte)_codeBuffer[_position++]; + uint targetAddress = (uint)(_position + offset); + operands = $"0x{targetAddress:X8}"; + } + } + else + { + // For other opcodes, we'll just show the raw bytes for now + // In a full implementation, we would decode the ModR/M byte, SIB byte, etc. + } + break; + } + + // Set the instruction properties + instruction.Mnemonic = mnemonic; + instruction.Operands = operands; + + // Copy the instruction bytes + int bytesRead = _position - startPosition; + instruction.Bytes = new byte[bytesRead]; + Array.Copy(_codeBuffer, startPosition, instruction.Bytes, 0, bytesRead); + + return bytesRead; + } +} diff --git a/X86Disassembler/X86/InstructionType.cs b/X86Disassembler/X86/InstructionType.cs new file mode 100644 index 0000000..cc1aed3 --- /dev/null +++ b/X86Disassembler/X86/InstructionType.cs @@ -0,0 +1,72 @@ +namespace X86Disassembler.X86; + +/// +/// Represents the different types of x86 instructions +/// +public enum InstructionType +{ + /// + /// Unknown or unrecognized instruction + /// + Unknown, + + /// + /// Data transfer instructions (e.g., MOV, PUSH, POP, XCHG) + /// + DataTransfer, + + /// + /// Arithmetic instructions (e.g., ADD, SUB, MUL, DIV) + /// + Arithmetic, + + /// + /// Logical instructions (e.g., AND, OR, XOR, NOT) + /// + Logical, + + /// + /// Shift and rotate instructions (e.g., SHL, SHR, ROL, ROR) + /// + ShiftRotate, + + /// + /// Control flow instructions (e.g., JMP, CALL, RET) + /// + ControlFlow, + + /// + /// Conditional jump instructions (e.g., JE, JNE, JG, JL) + /// + ConditionalJump, + + /// + /// String instructions (e.g., MOVS, CMPS, SCAS) + /// + String, + + /// + /// I/O instructions (e.g., IN, OUT) + /// + IO, + + /// + /// Flag control instructions (e.g., STC, CLC, CMC) + /// + FlagControl, + + /// + /// Processor control instructions (e.g., HLT, WAIT) + /// + ProcessorControl, + + /// + /// Floating-point instructions (e.g., FADD, FSUB, FMUL) + /// + FloatingPoint, + + /// + /// SIMD instructions (e.g., MMX, SSE, AVX) + /// + SIMD +}