0
mirror of https://github.com/sampletext32/ParkanPlayground.git synced 2025-07-01 04:40:25 +03:00
This commit is contained in:
bird_egop
2025-04-18 16:29:53 +03:00
parent 23fb497e0a
commit 7eead316cd
8 changed files with 296 additions and 366 deletions

View File

@ -0,0 +1,13 @@
namespace X86Disassembler.Analysers;
public class AsmFunction
{
public ulong Address { get; set; }
public List<InstructionBlock> Blocks { get; set; }
public override string ToString()
{
return $"Function at {Address:X8}\n{string.Join("\n", Blocks.Select(x => $"\t{x}"))}";
}
}

View File

@ -0,0 +1,197 @@
using X86Disassembler.X86;
namespace X86Disassembler.Analysers;
/// <summary>
/// Disassembles code into basic blocks by following control flow instructions.
/// A basic block is a sequence of instructions with a single entry point (the first instruction)
/// and a single exit point (the last instruction, typically a jump or return).
/// </summary>
public class BlockDisassembler
{
// The buffer containing the code to disassemble
private readonly byte[] _codeBuffer;
// The length of the buffer
private readonly int _length;
// The base address of the code
private readonly ulong _baseAddress;
/// <summary>
/// Initializes a new instance of the BlockDisassembler class
/// </summary>
/// <param name="codeBuffer">The raw code bytes to be disassembled</param>
/// <param name="baseAddress">The base RVA (Relative Virtual Address) of the code section</param>
public BlockDisassembler(byte[] codeBuffer, ulong baseAddress)
{
_codeBuffer = codeBuffer;
_length = codeBuffer.Length;
_baseAddress = baseAddress;
}
/// <summary>
/// Disassembles code starting from the specified RVA address by following control flow.
/// Creates blocks of instructions separated by jumps, branches, and returns.
/// </summary>
/// <param name="rvaAddress">The RVA (Relative Virtual Address) to start disassembly from</param>
/// <returns>A list of instruction blocks representing the control flow of the code</returns>
public AsmFunction DisassembleFromAddress(uint rvaAddress)
{
// Create instruction decoder for parsing the code buffer
InstructionDecoder decoder = new InstructionDecoder(_codeBuffer, _length);
// Track visited addresses to prevent infinite loops
HashSet<ulong> visitedAddresses = [];
// Queue of addresses to process (breadth-first approach)
Queue<ulong> addressQueue = [];
// Calculate the file offset from the RVA by subtracting the base address
addressQueue.Enqueue(rvaAddress - _baseAddress);
// List to store discovered basic blocks
List<InstructionBlock> blocks = [];
while (addressQueue.Count > 0)
{
// Get the next address to process
var address = addressQueue.Dequeue();
// Skip if we've already visited this address
if (!visitedAddresses.Add(address))
{
Console.WriteLine($"Already visited address {address}");
continue;
}
// Position the decoder at the current address
decoder.SetPosition((int) address);
// Collect instructions for this block
List<Instruction> instructions = [];
// Process instructions until we hit a control flow change
while (true)
{
// If we've stepped onto an existing block, create a new block up to this point
// and stop processing this path (to avoid duplicating instructions)
if (blocks.Any(x => x.Address == (ulong) decoder.GetPosition()))
{
Console.WriteLine("Stepped on to existing block. Creating in the middle");
RegisterBlock(blocks, address, instructions);
break;
}
// Decode the next instruction
var instruction = decoder.DecodeInstruction();
// Handle decoding failures
if (instruction is null)
{
throw new InvalidOperationException($"Unexpectedly failed to decode instruction at {address}");
}
// Add the instruction to the current block
instructions.Add(instruction);
// Check for conditional jump (e.g., JZ, JNZ, JLE)
// For conditional jumps, we need to follow both the jump target and the fall-through path
if (instruction.Type.IsConditionalJump())
{
// Register this block (it ends with a conditional jump)
RegisterBlock(blocks, address, instructions);
// Queue the jump target address for processing
addressQueue.Enqueue(
instruction.StructuredOperands[0]
.GetValue()
);
// Queue the fall-through address (next instruction after this jump)
addressQueue.Enqueue((uint) decoder.GetPosition());
break;
}
// Check for unconditional jump (e.g., JMP)
// For unconditional jumps, we only follow the jump target
if (instruction.Type.IsRegularJump())
{
// Register this block (it ends with an unconditional jump)
RegisterBlock(blocks, address, instructions);
// Queue the jump target address for processing
addressQueue.Enqueue(
instruction.StructuredOperands[0]
.GetValue()
);
break;
}
// Check for return instruction (e.g., RET, RETF)
// Returns end a block without any successors
if (instruction.Type.IsRet())
{
// Register this block (it ends with a return)
RegisterBlock(blocks, address, instructions);
break;
}
}
}
// Since blocks aren't necessarily ordered (ASM can jump anywhere it likes)
// we need to sort the blocks ourselves
blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address));
return new AsmFunction()
{
Address = rvaAddress,
Blocks = blocks,
};
}
/// <summary>
/// Creates and registers a new instruction block in the blocks collection
/// </summary>
/// <param name="blocks">The list of blocks to add to</param>
/// <param name="address">The starting address of the block</param>
/// <param name="instructions">The instructions contained in the block</param>
public void RegisterBlock(List<InstructionBlock> blocks, ulong address, List<Instruction> instructions)
{
// Create a new block with the provided address and instructions
var block = new InstructionBlock()
{
Address = address,
Instructions = instructions
};
// Add the block to the collection
blocks.Add(block);
// Log the created block for debugging
Console.WriteLine($"Created block:\n{block}");
}
}
/// <summary>
/// Represents a basic block of instructions with a single entry and exit point
/// </summary>
public class InstructionBlock
{
/// <summary>
/// The starting address of the block
/// </summary>
public ulong Address { get; set; }
/// <summary>
/// The list of instructions contained in this block
/// </summary>
public List<Instruction> Instructions { get; set; }
/// <summary>
/// Returns a string representation of the block, including its address and instructions
/// </summary>
public override string ToString()
{
return $"Address: {Address:X8}\n{string.Join("\n", Instructions)}";
}
}

View File

@ -0,0 +1,40 @@
using X86Disassembler.X86;
namespace X86Disassembler.Analysers;
public static class InstructionTypeExtensions
{
public static bool IsConditionalJump(this InstructionType type)
{
return type switch
{
InstructionType.Jg => true,
InstructionType.Jge => true,
InstructionType.Jl => true,
InstructionType.Jle => true,
InstructionType.Ja => true,
InstructionType.Jae => true,
InstructionType.Jb => true,
InstructionType.Jbe => true,
InstructionType.Jz => true,
InstructionType.Jnz => true,
InstructionType.Jo => true,
InstructionType.Jno => true,
InstructionType.Js => true,
InstructionType.Jns => true,
InstructionType.Jp => true,
InstructionType.Jnp => true,
_ => false
};
}
public static bool IsRegularJump(this InstructionType type)
{
return type == InstructionType.Jmp;
}
public static bool IsRet(this InstructionType type)
{
return type is InstructionType.Ret or InstructionType.Retf;
}
}

View File

@ -0,0 +1,16 @@
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
public static class OperandExtensions
{
public static uint GetValue(this Operand operand)
{
return operand switch
{
RelativeOffsetOperand roo => roo.TargetAddress,
_ => 0
};
}
}

View File

@ -1,3 +1,4 @@
using X86Disassembler.Analysers;
using X86Disassembler.PE;
using X86Disassembler.X86;
@ -63,102 +64,37 @@ public class Program
var section = codeSections[0];
byte[] codeBytes = peFile.GetSectionData(peFile.SectionHeaders.IndexOf(section));
// First demonstrate sequential disassembly
Console.WriteLine($"Sequential disassembly of section {section.Name} at RVA 0x{section.VirtualAddress:X8}:");
// // First demonstrate sequential disassembly
// Console.WriteLine($"Sequential disassembly of section {section.Name} at RVA 0x{section.VirtualAddress:X8}:");
//
// // Create a disassembler for the code section
// // Base address should be the section's virtual address, not the image base + VA
// Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress);
//
// // Disassemble sequentially (linear approach)
// var linearInstructions = disassembler.Disassemble();
//
// // Print the first 30 instructions from linear disassembly
// int linearCount = Math.Min(30, linearInstructions.Count);
// for (int i = 0; i < linearCount; i++)
// {
// Console.WriteLine(linearInstructions[i]);
// }
//
// // Print a summary of how many more instructions there are
// if (linearInstructions.Count > linearCount)
// {
// Console.WriteLine($"... ({linearInstructions.Count - linearCount} more instructions not shown)");
// }
// Create a disassembler for the code section
// Base address should be the section's virtual address, not the image base + VA
Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress);
// disassemble entry point
var disassembler = new BlockDisassembler(codeBytes, section.VirtualAddress);
// Disassemble sequentially (linear approach)
var linearInstructions = disassembler.Disassemble();
// Print the first 30 instructions from linear disassembly
int linearCount = Math.Min(30, linearInstructions.Count);
for (int i = 0; i < linearCount; i++)
{
Console.WriteLine(linearInstructions[i]);
}
// Print a summary of how many more instructions there are
if (linearInstructions.Count > linearCount)
{
Console.WriteLine($"... ({linearInstructions.Count - linearCount} more instructions not shown)");
}
Console.WriteLine();
Console.WriteLine("====================================================");
Console.WriteLine();
// Now demonstrate control flow-based disassembly from entry point
Console.WriteLine($"Control flow-based disassembly starting from entry point 0x{peFile.OptionalHeader.AddressOfEntryPoint:X8}:");
try
{
// Get the entry point RVA from the PE header
uint entryPointRva = peFile.OptionalHeader.AddressOfEntryPoint;
// Make sure the entry point is within this code section
if (entryPointRva >= section.VirtualAddress &&
entryPointRva < section.VirtualAddress + section.VirtualSize)
{
// Disassemble starting from the entry point (control flow-based)
var cfgInstructions = disassembler.DisassembleFunction(entryPointRva);
// Print the instructions from the entry point function
int cfgCount = Math.Min(50, cfgInstructions.Count);
for (int i = 0; i < cfgCount; i++)
{
Console.WriteLine(cfgInstructions[i]);
}
// Print a summary if there are more instructions
if (cfgInstructions.Count > cfgCount)
{
Console.WriteLine($"... ({cfgInstructions.Count - cfgCount} more instructions in this function not shown)");
}
Console.WriteLine();
Console.WriteLine($"Found {cfgInstructions.Count} instructions following control flow from entry point.");
}
else
{
// Try one of the exported functions instead
Console.WriteLine($"Entry point is not in the {section.Name} section. Trying the first exported function instead...");
if (peFile.ExportDirectory != null && peFile.ExportedFunctions.Count > 0)
{
uint functionRva = peFile.ExportedFunctions[0].AddressRva;
Console.WriteLine($"Disassembling exported function at RVA 0x{functionRva:X8} ({peFile.ExportedFunctions[0].Name}):");
var cfgInstructions = disassembler.DisassembleFunction(functionRva);
// Print the instructions from the function
int cfgCount = Math.Min(50, cfgInstructions.Count);
for (int i = 0; i < cfgCount; i++)
{
Console.WriteLine(cfgInstructions[i]);
}
// Print a summary if there are more instructions
if (cfgInstructions.Count > cfgCount)
{
Console.WriteLine($"... ({cfgInstructions.Count - cfgCount} more instructions in this function not shown)");
}
Console.WriteLine();
Console.WriteLine($"Found {cfgInstructions.Count} instructions following control flow from exported function.");
}
else
{
Console.WriteLine("No exported functions found to disassemble.");
}
}
}
catch (Exception ex)
{
Console.WriteLine($"Error during control flow disassembly: {ex.Message}");
}
var asmFunction = disassembler.DisassembleFromAddress(peFile.OptionalHeader.AddressOfEntryPoint);
Console.WriteLine(asmFunction);
_ = 5;
}
// Console.WriteLine("\nPress Enter to exit...");

View File

@ -2,7 +2,6 @@ using X86Disassembler.X86.Operands;
namespace X86Disassembler.X86;
using System.Text;
using System.Collections.Generic;
/// <summary>
@ -19,9 +18,6 @@ public class Disassembler
// The base address of the code
private readonly ulong _baseAddress;
// Segment override prefixes
private static readonly byte[] SegmentOverridePrefixes = {0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65};
/// <summary>
/// Initializes a new instance of the Disassembler class
/// </summary>
@ -34,35 +30,6 @@ public class Disassembler
_baseAddress = baseAddress;
}
/// <summary>
/// Checks if a byte is a segment override prefix
/// </summary>
/// <param name="b">The byte to check</param>
/// <returns>True if the byte is a segment override prefix</returns>
private bool IsSegmentOverridePrefix(byte b)
{
return Array.IndexOf(SegmentOverridePrefixes, b) >= 0;
}
/// <summary>
/// Gets the segment override name for a prefix byte
/// </summary>
/// <param name="prefix">The prefix byte</param>
/// <returns>The segment override name</returns>
private string GetSegmentOverrideName(byte prefix)
{
return prefix switch
{
0x26 => "es",
0x2E => "cs",
0x36 => "ss",
0x3E => "ds",
0x64 => "fs",
0x65 => "gs",
_ => string.Empty
};
}
/// <summary>
/// Disassembles the code buffer sequentially and returns all disassembled instructions
/// </summary>
@ -117,196 +84,4 @@ public class Disassembler
return instructions;
}
/// <summary>
/// Disassembles a function starting from a specific virtual address (RVA) and follows control flow
/// </summary>
/// <param name="startRva">The relative virtual address to start disassembly from</param>
/// <returns>A list of disassembled instructions representing the function</returns>
public List<Instruction> DisassembleFunction(uint startRva)
{
// The _baseAddress is the section's RVA (stored in Program.cs)
// We need to calculate the offset within the section by subtracting the section's RVA from the start RVA
int startOffset = (int)(startRva - _baseAddress);
// Validate the offset is within bounds
if (startOffset < 0 || startOffset >= _length)
{
throw new ArgumentOutOfRangeException(nameof(startRva),
$"Start address 0x{startRva:X8} is outside the bounds of the section at RVA 0x{_baseAddress:X8} with size {_length}");
}
return DisassembleFromOffset(startOffset);
}
/// <summary>
/// Disassembles instructions starting from a specific offset using control flow analysis
/// </summary>
/// <param name="startOffset">The offset in the code buffer to start disassembly from</param>
/// <returns>A list of disassembled instructions</returns>
private List<Instruction> DisassembleFromOffset(int startOffset)
{
// Keep track of disassembled instructions
List<Instruction> instructions = new List<Instruction>();
// Track visited addresses to avoid infinite loops
HashSet<int> visitedOffsets = new HashSet<int>();
// Queue of offsets to process
Queue<int> offsetQueue = new Queue<int>();
offsetQueue.Enqueue(startOffset);
while (offsetQueue.Count > 0)
{
int currentOffset = offsetQueue.Dequeue();
// Skip if we've already processed this offset
if (visitedOffsets.Contains(currentOffset))
{
continue;
}
// Create a new decoder positioned at the current offset
InstructionDecoder decoder = new InstructionDecoder(_codeBuffer, _length);
decoder.SetPosition(currentOffset);
// Process instructions at this address until we hit a control flow change
while (decoder.CanReadByte() && decoder.GetPosition() < _length)
{
int positionBeforeDecode = decoder.GetPosition();
visitedOffsets.Add(positionBeforeDecode);
// Decode the instruction
Instruction? instruction = decoder.DecodeInstruction();
if (instruction == null)
{
// Invalid instruction, skip to next byte
decoder.SetPosition(positionBeforeDecode + 1);
continue;
}
// Set the instruction address
instruction.Address = _baseAddress + (uint)positionBeforeDecode;
// Add the instruction to our list
instructions.Add(instruction);
// Check for control flow instructions
if (IsReturnInstruction(instruction))
{
// End of function, don't follow any further from this branch
break;
}
else if (IsUnconditionalJump(instruction))
{
// Follow the unconditional jump target
int? targetOffset = GetJumpTargetOffset(instruction, positionBeforeDecode);
if (targetOffset.HasValue && targetOffset.Value >= 0 && targetOffset.Value < _length)
{
offsetQueue.Enqueue(targetOffset.Value);
}
// End this branch of execution
break;
}
else if (IsConditionalJump(instruction))
{
// Follow both paths for conditional jumps (target and fall-through)
int? targetOffset = GetJumpTargetOffset(instruction, positionBeforeDecode);
if (targetOffset.HasValue && targetOffset.Value >= 0 && targetOffset.Value < _length)
{
offsetQueue.Enqueue(targetOffset.Value);
}
// Continue with fall-through path in this loop
}
else if (IsCallInstruction(instruction))
{
// For calls, we just continue with the next instruction (we don't follow the call)
// We could add separate functionality to follow calls if needed
}
}
}
// Sort instructions by address for readability
instructions.Sort((a, b) => a.Address.CompareTo(b.Address));
return instructions;
}
/// <summary>
/// Checks if an instruction is a return instruction
/// </summary>
private bool IsReturnInstruction(Instruction instruction)
{
return instruction.Type == InstructionType.Ret ||
instruction.Type == InstructionType.Retf;
}
/// <summary>
/// Checks if an instruction is an unconditional jump
/// </summary>
private bool IsUnconditionalJump(Instruction instruction)
{
return instruction.Type == InstructionType.Jmp;
}
/// <summary>
/// Checks if an instruction is a conditional jump
/// </summary>
private bool IsConditionalJump(Instruction instruction)
{
return instruction.Type == InstructionType.Je ||
instruction.Type == InstructionType.Jne ||
instruction.Type == InstructionType.Ja ||
instruction.Type == InstructionType.Jae ||
instruction.Type == InstructionType.Jb ||
instruction.Type == InstructionType.Jbe ||
instruction.Type == InstructionType.Jg ||
instruction.Type == InstructionType.Jge ||
instruction.Type == InstructionType.Jl ||
instruction.Type == InstructionType.Jle ||
instruction.Type == InstructionType.Jo ||
instruction.Type == InstructionType.Jno ||
instruction.Type == InstructionType.Jp ||
instruction.Type == InstructionType.Jnp ||
instruction.Type == InstructionType.Js ||
instruction.Type == InstructionType.Jns ||
instruction.Type == InstructionType.Jcxz;
}
/// <summary>
/// Checks if an instruction is a call instruction
/// </summary>
private bool IsCallInstruction(Instruction instruction)
{
return instruction.Type == InstructionType.Call;
}
/// <summary>
/// Gets the jump target offset from a jump instruction
/// </summary>
private int? GetJumpTargetOffset(Instruction instruction, int instructionOffset)
{
// Check if the instruction has at least one operand
if (instruction.StructuredOperands == null || instruction.StructuredOperands.Count == 0)
{
return null;
}
// Look for an immediate operand which represents the offset
var operand = instruction.StructuredOperands[0];
if (operand is ImmediateOperand immediateOperand)
{
// Calculate the target address
// For relative jumps, the target is IP (instruction pointer) + instruction length + offset
int instructionLength = (int)(instruction.Address - _baseAddress) - instructionOffset + 1;
int jumpOffset = Convert.ToInt32(immediateOperand.Value);
return instructionOffset + instructionLength + jumpOffset;
}
// For now, we don't handle indirect jumps like JMP [eax] or JMP [ebx+4]
return null;
}
}

View File

@ -220,51 +220,6 @@ public class InstructionDecoder
return _prefixDecoder.HasOperandSizePrefix();
}
/// <summary>
/// Checks if the address size prefix is present
/// </summary>
/// <returns>True if the address size prefix is present</returns>
public bool HasAddressSizePrefix()
{
return _prefixDecoder.HasAddressSizePrefix();
}
/// <summary>
/// Checks if a segment override prefix is present
/// </summary>
/// <returns>True if a segment override prefix is present</returns>
public bool HasSegmentOverridePrefix()
{
return _prefixDecoder.HasSegmentOverridePrefix();
}
/// <summary>
/// Gets the segment override prefix
/// </summary>
/// <returns>The segment override prefix, or an empty string if none is present</returns>
public string GetSegmentOverride()
{
return _prefixDecoder.GetSegmentOverride();
}
/// <summary>
/// Checks if the LOCK prefix is present
/// </summary>
/// <returns>True if the LOCK prefix is present</returns>
public bool HasLockPrefix()
{
return _prefixDecoder.HasLockPrefix();
}
/// <summary>
/// Checks if the REP/REPNE prefix is present
/// </summary>
/// <returns>True if the REP/REPNE prefix is present</returns>
public bool HasRepPrefix()
{
return _prefixDecoder.HasRepPrefix();
}
/// <summary>
/// Checks if the instruction has an operand size override prefix (0x66)
/// </summary>

View File

@ -48,8 +48,6 @@ public enum InstructionType
// Control flow
Jmp, // Jump unconditionally
Je, // Jump if equal
Jne, // Jump if not equal
Jg, // Jump if greater
Jge, // Jump if greater or equal
Jl, // Jump if less