diff --git a/X86Disassembler/Analysers/AnalyzerContext.cs b/X86Disassembler/Analysers/AnalyzerContext.cs
new file mode 100644
index 0000000..18f2b1f
--- /dev/null
+++ b/X86Disassembler/Analysers/AnalyzerContext.cs
@@ -0,0 +1,125 @@
+namespace X86Disassembler.Analysers;
+
+///
+/// Central context for all analysis data related to a disassembled function
+///
+public class AnalyzerContext
+{
+ ///
+ /// The function being analyzed
+ ///
+ public AsmFunction Function { get; }
+
+ ///
+ /// Dictionary mapping block addresses to instruction blocks
+ ///
+ public Dictionary BlocksByAddress { get; } = [];
+
+ ///
+ /// Dictionary mapping loop header addresses to loops
+ ///
+ public Dictionary LoopsByHeaderAddress { get; } = [];
+
+ ///
+ /// Dictionary mapping block addresses to the loops that contain them
+ ///
+ public Dictionary> LoopsByBlockAddress { get; } = [];
+
+ ///
+ /// Dictionary for storing arbitrary analysis data by address
+ ///
+ public Dictionary> AnalysisDataByAddress { get; } = [];
+
+ ///
+ /// Creates a new analyzer context for the given function
+ ///
+ /// The function to analyze
+ public AnalyzerContext(AsmFunction function)
+ {
+ Function = function;
+
+ // Initialize the block dictionary
+ foreach (var block in function.Blocks)
+ {
+ BlocksByAddress[block.Address] = block;
+ }
+ }
+
+ ///
+ /// Represents a loop in the control flow graph
+ ///
+ public class Loop
+ {
+ ///
+ /// The header block of the loop (the entry point into the loop)
+ ///
+ public InstructionBlock Header { get; set; } = null!;
+
+ ///
+ /// The blocks that are part of this loop
+ ///
+ public List Blocks { get; set; } = [];
+
+ ///
+ /// The back edge that completes the loop (from a block back to the header)
+ ///
+ public (InstructionBlock From, InstructionBlock To) BackEdge { get; set; }
+
+ ///
+ /// The exit blocks of the loop (blocks that have successors outside the loop)
+ ///
+ public List ExitBlocks { get; set; } = [];
+ }
+
+ ///
+ /// Stores analysis data for a specific address
+ ///
+ /// The address to store data for
+ /// The key for the data
+ /// The data to store
+ public void StoreAnalysisData(ulong address, string key, object value)
+ {
+ if (!AnalysisDataByAddress.TryGetValue(address, out var dataDict))
+ {
+ dataDict = [];
+ AnalysisDataByAddress[address] = dataDict;
+ }
+
+ dataDict[key] = value;
+ }
+
+ ///
+ /// Retrieves analysis data for a specific address
+ ///
+ /// The address to retrieve data for
+ /// The key for the data
+ /// The stored data, or null if not found
+ public object? GetAnalysisData(ulong address, string key)
+ {
+ if (AnalysisDataByAddress.TryGetValue(address, out var dataDict) &&
+ dataDict.TryGetValue(key, out var value))
+ {
+ return value;
+ }
+
+ return null;
+ }
+
+ ///
+ /// Retrieves typed analysis data for a specific address
+ ///
+ /// The type of data to retrieve
+ /// The address to retrieve data for
+ /// The key for the data
+ /// The stored data, or default(T) if not found or wrong type
+ public T? GetAnalysisData(ulong address, string key)
+ {
+ var data = GetAnalysisData(address, key);
+ if (data is T typedData)
+ {
+ return typedData;
+ }
+
+ return default;
+ }
+}
diff --git a/X86Disassembler/Analysers/AsmFunction.cs b/X86Disassembler/Analysers/AsmFunction.cs
index 8f89f14..12b3e6f 100644
--- a/X86Disassembler/Analysers/AsmFunction.cs
+++ b/X86Disassembler/Analysers/AsmFunction.cs
@@ -26,16 +26,62 @@ public class AsmFunction
public List ExitBlocks => Blocks.Where(b =>
b.Instructions.Count > 0 &&
b.Instructions[^1].Type.IsRet()).ToList();
+
+ ///
+ /// The analyzer context for this function
+ ///
+ public AnalyzerContext Context { get; private set; }
+
+ ///
+ /// Creates a new AsmFunction instance
+ ///
+ public AsmFunction()
+ {
+ Context = new AnalyzerContext(this);
+ }
+
+ ///
+ /// Analyzes the function using various analyzers
+ ///
+ public void Analyze()
+ {
+ // Analyze loops
+ var loopAnalyzer = new LoopAnalyzer();
+ loopAnalyzer.AnalyzeLoops(Context);
+
+ // Analyze data flow
+ var dataFlowAnalyzer = new DataFlowAnalyzer();
+ dataFlowAnalyzer.AnalyzeDataFlow(Context);
+ }
///
- /// Returns a string representation of the function, including its address and blocks
+ /// Returns a string representation of the function, including its address, blocks, and analysis results
///
public override string ToString()
{
+ string loopsInfo = "";
+ if (Context.LoopsByHeaderAddress.Count > 0)
+ {
+ loopsInfo = $"Loops: {Context.LoopsByHeaderAddress.Count}\n";
+ int i = 0;
+ foreach (var loop in Context.LoopsByHeaderAddress.Values)
+ {
+ loopsInfo += $" Loop {i++}: Header=0x{loop.Header.Address:X8}, " +
+ $"Blocks={loop.Blocks.Count}, " +
+ $"Back Edge=(0x{loop.BackEdge.From.Address:X8} -> 0x{loop.BackEdge.To.Address:X8}), " +
+ $"Exits={loop.ExitBlocks.Count}\n";
+ }
+ }
+ else
+ {
+ loopsInfo = "Loops: None\n";
+ }
+
return $"Function at 0x{Address:X8}\n" +
$"Entry Block: 0x{EntryBlock?.Address.ToString("X8") ?? "None"}\n" +
$"Exit Blocks: {(ExitBlocks.Count > 0 ? string.Join(", ", ExitBlocks.Select(b => $"0x{b.Address:X8}")) : "None")}\n" +
$"Total Blocks: {Blocks.Count}\n" +
+ loopsInfo +
$"{string.Join("\n", Blocks.Select(x => $"\t{x}"))}";
}
}
\ No newline at end of file
diff --git a/X86Disassembler/Analysers/BlockDisassembler.cs b/X86Disassembler/Analysers/BlockDisassembler.cs
index c377d7d..3efa843 100644
--- a/X86Disassembler/Analysers/BlockDisassembler.cs
+++ b/X86Disassembler/Analysers/BlockDisassembler.cs
@@ -1,4 +1,5 @@
using X86Disassembler.X86;
+using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
@@ -138,11 +139,45 @@ public class BlockDisassembler
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
- // Queue the jump target address for processing
- addressQueue.Enqueue(jumpTargetAddress);
+ // Register the target block if it doesn't exist yet
+ InstructionBlock? jumpTargetBlock = null;
+ if (blocksByAddress.TryGetValue(jumpTargetAddress, out var existingTargetBlock))
+ {
+ jumpTargetBlock = existingTargetBlock;
+ }
+ else
+ {
+ // We'll create this block later when we process the queue
+ // For now, just queue it for processing
+ addressQueue.Enqueue(jumpTargetAddress);
+ }
- // Queue the fall-through address (next instruction after this jump)
- addressQueue.Enqueue(fallThroughAddress);
+ // Register the fall-through block if it doesn't exist yet
+ InstructionBlock? fallThroughBlock = null;
+ if (blocksByAddress.TryGetValue(fallThroughAddress, out var existingFallThroughBlock))
+ {
+ fallThroughBlock = existingFallThroughBlock;
+ }
+ else
+ {
+ // We'll create this block later when we process the queue
+ // For now, just queue it for processing
+ addressQueue.Enqueue(fallThroughAddress);
+ }
+
+ // If the jump target block exists, add it as a successor to the current block
+ if (jumpTargetBlock != null)
+ {
+ newBlock.Successors.Add(jumpTargetBlock);
+ jumpTargetBlock.Predecessors.Add(newBlock);
+ }
+
+ // If the fall-through block exists, add it as a successor to the current block
+ if (fallThroughBlock != null)
+ {
+ newBlock.Successors.Add(fallThroughBlock);
+ fallThroughBlock.Predecessors.Add(newBlock);
+ }
break;
}
@@ -158,8 +193,25 @@ public class BlockDisassembler
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
- // Queue the jump target address for processing
- addressQueue.Enqueue(jumpTargetAddress);
+ // Register the target block if it doesn't exist yet
+ InstructionBlock? jumpTargetBlock = null;
+ if (blocksByAddress.TryGetValue(jumpTargetAddress, out var existingTargetBlock))
+ {
+ jumpTargetBlock = existingTargetBlock;
+ }
+ else
+ {
+ // We'll create this block later when we process the queue
+ // For now, just queue it for processing
+ addressQueue.Enqueue(jumpTargetAddress);
+ }
+
+ // If the jump target block exists, add it as a successor to the current block
+ if (jumpTargetBlock != null)
+ {
+ newBlock.Successors.Add(jumpTargetBlock);
+ jumpTargetBlock.Predecessors.Add(newBlock);
+ }
break;
}
@@ -181,11 +233,201 @@ public class BlockDisassembler
// we need to sort the blocks ourselves
blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address));
- // Convert all block addresses from file offsets to RVA
+ // First, establish the successor and predecessor relationships based on file offsets
+ // This is done by analyzing the last instruction of each block
foreach (var block in blocks)
{
- // Convert from file offset to RVA by adding the base address
- block.Address += _baseAddress;
+ if (block.Instructions.Count == 0) continue;
+
+ var lastInstruction = block.Instructions[^1];
+
+ // Check if the last instruction is a conditional jump
+ if (lastInstruction.Type.IsConditionalJump())
+ {
+ // Get the jump target address (file offset)
+ ulong targetAddress = 0;
+ if (lastInstruction.StructuredOperands.Count > 0 && lastInstruction.StructuredOperands[0] is RelativeOffsetOperand relOp)
+ {
+ targetAddress = relOp.TargetAddress;
+ }
+
+ // Find the target block
+ var targetBlock = blocks.FirstOrDefault(b => b.Address == targetAddress);
+ if (targetBlock != null)
+ {
+ // Add the target block as a successor to this block
+ if (!block.Successors.Contains(targetBlock))
+ {
+ block.Successors.Add(targetBlock);
+ }
+
+ // Add this block as a predecessor to the target block
+ if (!targetBlock.Predecessors.Contains(block))
+ {
+ targetBlock.Predecessors.Add(block);
+ }
+
+ // For conditional jumps, also add the fall-through block as a successor
+ // The fall-through block is the one that immediately follows this block in memory
+ // Find the next block in address order
+ var nextBlock = blocks.OrderBy(b => b.Address).FirstOrDefault(b => b.Address > block.Address);
+ if (nextBlock != null)
+ {
+ // The fall-through block is the one that immediately follows this block in memory
+ var fallThroughBlock = nextBlock;
+
+ // Add the fall-through block as a successor to this block
+ if (!block.Successors.Contains(fallThroughBlock))
+ {
+ block.Successors.Add(fallThroughBlock);
+ }
+
+ // Add this block as a predecessor to the fall-through block
+ if (!fallThroughBlock.Predecessors.Contains(block))
+ {
+ fallThroughBlock.Predecessors.Add(block);
+ }
+ }
+ }
+ }
+ // Check if the last instruction is an unconditional jump
+ else if (lastInstruction.Type == InstructionType.Jmp)
+ {
+ // Get the jump target address (file offset)
+ ulong targetAddress = 0;
+ if (lastInstruction.StructuredOperands.Count > 0 && lastInstruction.StructuredOperands[0] is RelativeOffsetOperand relOp)
+ {
+ targetAddress = relOp.TargetAddress;
+ }
+
+ // Find the target block
+ var targetBlock = blocks.FirstOrDefault(b => b.Address == targetAddress);
+ if (targetBlock != null)
+ {
+ // Add the target block as a successor to this block
+ if (!block.Successors.Contains(targetBlock))
+ {
+ block.Successors.Add(targetBlock);
+ }
+
+ // Add this block as a predecessor to the target block
+ if (!targetBlock.Predecessors.Contains(block))
+ {
+ targetBlock.Predecessors.Add(block);
+ }
+ }
+ }
+ // For non-jump instructions that don't end the function (like Ret), add the fall-through block
+ else if (!lastInstruction.Type.IsRet())
+ {
+ // The fall-through block is the one that immediately follows this block in memory
+ // Find the next block in address order
+ var nextBlock = blocks.OrderBy(b => b.Address).FirstOrDefault(b => b.Address > block.Address);
+ if (nextBlock != null)
+ {
+ // The fall-through block is the one that immediately follows this block in memory
+ var fallThroughBlock = nextBlock;
+
+ // Add the fall-through block as a successor to this block
+ if (!block.Successors.Contains(fallThroughBlock))
+ {
+ block.Successors.Add(fallThroughBlock);
+ }
+
+ // Add this block as a predecessor to the fall-through block
+ if (!fallThroughBlock.Predecessors.Contains(block))
+ {
+ fallThroughBlock.Predecessors.Add(block);
+ }
+ }
+ }
+ }
+
+ // Store the original file offset for each block in a dictionary
+ Dictionary blockToFileOffset = new Dictionary();
+ foreach (var block in blocks)
+ {
+ blockToFileOffset[block] = block.Address;
+ }
+
+ // Convert all block addresses from file offsets to RVA
+ // and update the block dictionary for quick lookup
+ Dictionary rvaBlocksByAddress = new Dictionary();
+ Dictionary fileOffsetToRvaMap = new Dictionary();
+
+ // First pass: create a mapping from file offset to RVA for each block
+ foreach (var block in blocks)
+ {
+ // Get the original file offset address
+ ulong blockFileOffset = block.Address;
+
+ // Calculate the RVA address
+ ulong blockRvaAddress = blockFileOffset + _baseAddress;
+
+ // Store the mapping
+ fileOffsetToRvaMap[blockFileOffset] = blockRvaAddress;
+ }
+
+ // Second pass: update all blocks to use RVA addresses
+ foreach (var block in blocks)
+ {
+ // Get the original file offset address
+ ulong blockFileOffset = block.Address;
+
+ // Update the block's address to RVA
+ ulong blockRvaAddress = fileOffsetToRvaMap[blockFileOffset];
+ block.Address = blockRvaAddress;
+
+ // Add to the dictionary for quick lookup
+ rvaBlocksByAddress[blockRvaAddress] = block;
+ }
+
+ // Now update all successors and predecessors to use the correct RVA addresses
+ foreach (var block in blocks)
+ {
+ // Create new lists for successors and predecessors with the correct RVA addresses
+ List updatedSuccessors = new List();
+ List updatedPredecessors = new List();
+
+ // Update successors
+ foreach (var successor in block.Successors)
+ {
+ // Get the original file offset of the successor
+ if (blockToFileOffset.TryGetValue(successor, out ulong successorFileOffset))
+ {
+ // Look up the RVA address in our mapping
+ if (fileOffsetToRvaMap.TryGetValue(successorFileOffset, out ulong successorRvaAddress))
+ {
+ // Find the block with this RVA address
+ if (rvaBlocksByAddress.TryGetValue(successorRvaAddress, out var rvaSuccessor))
+ {
+ updatedSuccessors.Add(rvaSuccessor);
+ }
+ }
+ }
+ }
+
+ // Update predecessors
+ foreach (var predecessor in block.Predecessors)
+ {
+ // Get the original file offset of the predecessor
+ if (blockToFileOffset.TryGetValue(predecessor, out ulong predecessorFileOffset))
+ {
+ // Look up the RVA address in our mapping
+ if (fileOffsetToRvaMap.TryGetValue(predecessorFileOffset, out ulong predecessorRvaAddress))
+ {
+ // Find the block with this RVA address
+ if (rvaBlocksByAddress.TryGetValue(predecessorRvaAddress, out var rvaPredecessor))
+ {
+ updatedPredecessors.Add(rvaPredecessor);
+ }
+ }
+ }
+ }
+
+ // Replace the old lists with the updated ones
+ block.Successors = updatedSuccessors;
+ block.Predecessors = updatedPredecessors;
}
// Create a new AsmFunction with the RVA address
@@ -246,7 +488,7 @@ public class BlockDisassembler
var block = new InstructionBlock()
{
Address = address,
- Instructions = instructions
+ Instructions = new List(instructions) // Create a copy of the instructions list
};
// Add the block to the collection
@@ -261,8 +503,6 @@ public class BlockDisassembler
// Add the current block as a predecessor to the new block
block.Predecessors.Add(currentBlock);
}
-
- // Block created successfully
return block;
}
diff --git a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs b/X86Disassembler/Analysers/ControlFlowAnalyzer.cs
index 759d125..4b3b5c9 100644
--- a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs
+++ b/X86Disassembler/Analysers/ControlFlowAnalyzer.cs
@@ -42,89 +42,61 @@ public class ControlFlowAnalyzer
/// The function to analyze
private void IdentifyIfElseStructures(Function function)
{
- // First pass: identify basic if-else structures
+ // Now analyze each block for conditional jumps
foreach (var block in function.AsmFunction.Blocks)
{
- // Skip blocks that don't end with a conditional jump
- if (block.Instructions.Count == 0)
- {
- continue;
- }
+ // Get the last instruction in the block
+ var lastInstruction = block.Instructions.LastOrDefault();
+ if (lastInstruction == null) continue;
- var lastInstruction = block.Instructions[^1];
-
- // Look for conditional jumps (Jcc instructions)
- if (IsConditionalJump(lastInstruction.Type))
+ // Check if the last instruction is a conditional jump
+ if (lastInstruction.Type.IsConditionalJump())
{
- // This is a potential if-then-else structure
- // The true branch is the target of the jump
- // The false branch is the fallthrough block
-
// Get the jump target address
ulong targetAddress = GetJumpTargetAddress(lastInstruction);
// Find the target block
- if (_context.BlocksByAddress.TryGetValue(targetAddress, out var targetBlock))
+ InstructionBlock? targetBlock = null;
+ foreach (var b in function.AsmFunction.Blocks)
{
- // Find the fallthrough block (the block that follows this one in memory)
- var fallthroughBlock = FindFallthroughBlock(block);
-
- if (fallthroughBlock != null)
+ if (b.Address == targetAddress)
{
- // Check if the fallthrough block ends with an unconditional jump
- // This could indicate an if-else structure where the 'else' branch jumps to a common merge point
- InstructionBlock? mergeBlock = null;
- bool hasElseBlock = true;
-
- if (fallthroughBlock.Instructions.Count > 0 &&
- fallthroughBlock.Instructions[^1].Type == InstructionType.Jmp)
- {
- // Get the jump target address
- ulong mergeAddress = GetJumpTargetAddress(fallthroughBlock.Instructions[^1]);
-
- // Find the merge block
- if (_context.BlocksByAddress.TryGetValue(mergeAddress, out var potentialMergeBlock))
- {
- mergeBlock = potentialMergeBlock;
- }
- }
-
- // Check if the 'then' block also jumps to the same merge point
- if (mergeBlock != null && targetBlock.Instructions.Count > 0 &&
- targetBlock.Instructions[^1].Type == InstructionType.Jmp)
- {
- ulong thenJumpAddress = GetJumpTargetAddress(targetBlock.Instructions[^1]);
-
- if (thenJumpAddress == mergeBlock.Address)
- {
- // We have a classic if-else structure with a merge point
- // Store the if-else structure in the context
- var ifElseStructure = new IfElseStructure
- {
- ConditionBlock = block,
- ThenBlock = targetBlock,
- ElseBlock = fallthroughBlock,
- MergeBlock = mergeBlock,
- IsComplete = true // Both branches merge back
- };
-
- _context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
- continue;
- }
- }
-
- // If we get here, we have a simple if-then or if-then-else without a clear merge point
- var simpleIfStructure = new IfElseStructure
- {
- ConditionBlock = block,
- ThenBlock = targetBlock,
- ElseBlock = hasElseBlock ? fallthroughBlock : null,
- IsComplete = false // No clear merge point
- };
-
- _context.StoreAnalysisData(block.Address, "IfElseStructure", simpleIfStructure);
+ targetBlock = b;
+ break;
}
}
+
+ if (targetBlock == null)
+ {
+ continue;
+ }
+
+ // Find the fall-through block (should be in the successors)
+ InstructionBlock? fallThroughBlock = null;
+ foreach (var successor in block.Successors)
+ {
+ if (successor != targetBlock)
+ {
+ fallThroughBlock = successor;
+ break;
+ }
+ }
+
+ if (fallThroughBlock == null)
+ {
+ continue;
+ }
+
+ // Create an if-else structure
+ var ifElseStructure = new IfElseStructure
+ {
+ ConditionBlock = block,
+ ThenBlock = targetBlock,
+ ElseBlock = fallThroughBlock
+ };
+
+ // Store the if-else structure in the analysis context
+ function.AsmFunction.Context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
}
}
@@ -195,32 +167,6 @@ public class ControlFlowAnalyzer
}
}
- ///
- /// Checks if the given instruction type is a conditional jump
- ///
- /// The instruction type
- /// True if the instruction is a conditional jump, false otherwise
- private bool IsConditionalJump(InstructionType type)
- {
- // Check for common conditional jumps
- return type == InstructionType.Jz ||
- type == InstructionType.Jnz ||
- type == InstructionType.Jg ||
- type == InstructionType.Jge ||
- type == InstructionType.Jl ||
- type == InstructionType.Jle ||
- type == InstructionType.Ja ||
- type == InstructionType.Jae ||
- type == InstructionType.Jb ||
- type == InstructionType.Jbe ||
- type == InstructionType.Jo ||
- type == InstructionType.Jno ||
- type == InstructionType.Js ||
- type == InstructionType.Jns ||
- type == InstructionType.Jp ||
- type == InstructionType.Jnp;
- }
-
///
/// Gets the target address of a jump instruction
///
@@ -228,39 +174,31 @@ public class ControlFlowAnalyzer
/// The target address of the jump
private ulong GetJumpTargetAddress(Instruction instruction)
{
- // The target address is usually the first operand of the jump instruction
- if (instruction.StructuredOperands.Count > 0 &&
- instruction.StructuredOperands[0] is ImmediateOperand immOp)
+ // Add debug output to see the instruction and its operands
+
+ // For conditional jumps, the target address is the first operand
+ if (instruction.StructuredOperands.Count > 0)
{
- return (ulong)immOp.Value;
+ var operand = instruction.StructuredOperands[0];
+
+ if (operand is ImmediateOperand immOp)
+ {
+ return (ulong)immOp.Value;
+ }
+ else if (operand is RelativeOffsetOperand relOp)
+ {
+ // For relative jumps, the target address is directly available in the operand
+ // We need to convert from file offset to RVA by adding 0x1000 (the section offset)
+ // This matches how the blocks are converted in BlockDisassembler.cs
+ ulong rvaTargetAddress = relOp.TargetAddress + 0x1000;
+ return rvaTargetAddress;
+ }
}
// If we can't determine the target, return 0
return 0;
}
- ///
- /// Finds the fallthrough block for a given block
- ///
- /// The block to find the fallthrough for
- /// The fallthrough block, or null if none found
- private InstructionBlock? FindFallthroughBlock(InstructionBlock block)
- {
- // The fallthrough block is the one that follows this one in memory
- // It should be a successor of this block
- foreach (var successor in block.Successors)
- {
- // Check if this successor is the fallthrough block
- // (its address should be immediately after this block)
- if (successor.Address > block.Address)
- {
- return successor;
- }
- }
-
- return null;
- }
-
///
/// Checks if the given block is a potential switch statement header
///
diff --git a/X86Disassembler/Analysers/DataFlowAnalyzer.cs b/X86Disassembler/Analysers/DataFlowAnalyzer.cs
new file mode 100644
index 0000000..3f51c16
--- /dev/null
+++ b/X86Disassembler/Analysers/DataFlowAnalyzer.cs
@@ -0,0 +1,384 @@
+using X86Disassembler.X86;
+using X86Disassembler.X86.Operands;
+
+namespace X86Disassembler.Analysers;
+
+///
+/// Analyzes data flow through instructions to track register values
+///
+public class DataFlowAnalyzer
+{
+ // Constants for analysis data keys
+ private const string REGISTER_VALUE_KEY = "RegisterValue";
+ private const string MEMORY_VALUE_KEY = "MemoryValue";
+
+ ///
+ /// Represents a known value for a register or memory location
+ ///
+ public class ValueInfo
+ {
+ ///
+ /// The type of value (constant, register, memory, unknown)
+ ///
+ public enum ValueType
+ {
+ Unknown,
+ Constant,
+ Register,
+ Memory
+ }
+
+ ///
+ /// The type of this value
+ ///
+ public ValueType Type { get; set; } = ValueType.Unknown;
+
+ ///
+ /// The constant value (if Type is Constant)
+ ///
+ public ulong? ConstantValue { get; set; }
+
+ ///
+ /// The source register (if Type is Register)
+ ///
+ public RegisterIndex? SourceRegister { get; set; }
+
+ ///
+ /// The memory address or expression (if Type is Memory)
+ ///
+ public string? MemoryExpression { get; set; }
+
+ ///
+ /// The instruction that defined this value
+ ///
+ public Instruction? DefiningInstruction { get; set; }
+
+ ///
+ /// Returns a string representation of the value
+ ///
+ public override string ToString()
+ {
+ return Type switch
+ {
+ ValueType.Constant => $"0x{ConstantValue:X8}",
+ ValueType.Register => $"{SourceRegister}",
+ ValueType.Memory => $"[{MemoryExpression}]",
+ _ => "unknown"
+ };
+ }
+ }
+
+ ///
+ /// Analyzes data flow in the function and stores results in the analyzer context
+ ///
+ /// The analyzer context to store results in
+ public void AnalyzeDataFlow(AnalyzerContext context)
+ {
+ // Process each block in order
+ foreach (var block in context.Function.Blocks)
+ {
+ // Dictionary to track register values within this block
+ Dictionary registerValues = new();
+
+ // Process each instruction in the block
+ foreach (var instruction in block.Instructions)
+ {
+ // Process the instruction based on its type
+ ProcessInstruction(instruction, registerValues, context);
+
+ // Store the current register state at this instruction's address
+ StoreRegisterState(instruction.Address, registerValues, context);
+ }
+ }
+ }
+
+ ///
+ /// Processes an instruction to update register values
+ ///
+ /// The instruction to process
+ /// The current register values
+ /// The analyzer context
+ private void ProcessInstruction(Instruction instruction, Dictionary registerValues, AnalyzerContext context)
+ {
+ // Handle different instruction types
+ switch (instruction.Type)
+ {
+ // MOV instructions
+ case InstructionType.Mov:
+ ProcessMovInstruction(instruction, registerValues);
+ break;
+
+ // XOR instructions
+ case InstructionType.Xor:
+ ProcessXorInstruction(instruction, registerValues);
+ break;
+
+ // ADD instructions
+ case InstructionType.Add:
+ ProcessAddInstruction(instruction, registerValues);
+ break;
+
+ // SUB instructions
+ case InstructionType.Sub:
+ ProcessSubInstruction(instruction, registerValues);
+ break;
+
+ // PUSH/POP instructions can affect register values
+ case InstructionType.Pop:
+ ProcessPopInstruction(instruction, registerValues);
+ break;
+
+ // Call instructions typically clobber certain registers
+ case InstructionType.Call:
+ ProcessCallInstruction(instruction, registerValues);
+ break;
+
+ // Other instructions that modify registers
+ default:
+ // For now, mark destination registers as unknown for unsupported instructions
+ if (instruction.StructuredOperands.Count > 0 &&
+ instruction.StructuredOperands[0] is RegisterOperand regOp)
+ {
+ registerValues[regOp.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Unknown,
+ DefiningInstruction = instruction
+ };
+ }
+
+ break;
+ }
+ }
+
+ ///
+ /// Processes a MOV instruction to update register values
+ ///
+ private void ProcessMovInstruction(Instruction instruction, Dictionary registerValues)
+ {
+ // Handle different MOV variants
+ if (instruction.StructuredOperands.Count >= 2)
+ {
+ var dest = instruction.StructuredOperands[0];
+ var src = instruction.StructuredOperands[1];
+
+ // MOV reg, imm
+ if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc)
+ {
+ registerValues[destReg.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Constant,
+ ConstantValue = immSrc.Value,
+ DefiningInstruction = instruction
+ };
+ }
+ // MOV reg, reg
+ else if (dest is RegisterOperand destReg2 && src is RegisterOperand srcReg)
+ {
+ if (registerValues.TryGetValue(srcReg.Register, out var srcValue))
+ {
+ // Copy the source value
+ registerValues[destReg2.Register] = new ValueInfo
+ {
+ Type = srcValue.Type,
+ ConstantValue = srcValue.ConstantValue,
+ SourceRegister = srcValue.SourceRegister,
+ MemoryExpression = srcValue.MemoryExpression,
+ DefiningInstruction = instruction
+ };
+ }
+ else
+ {
+ // Source register value is unknown
+ registerValues[destReg2.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Register,
+ SourceRegister = srcReg.Register,
+ DefiningInstruction = instruction
+ };
+ }
+ }
+ // MOV reg, [mem]
+ else if (dest is RegisterOperand destReg3 && src is MemoryOperand memSrc)
+ {
+ registerValues[destReg3.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Memory,
+ MemoryExpression = memSrc.ToString(),
+ DefiningInstruction = instruction
+ };
+ }
+ // MOV [mem], reg or MOV [mem], imm
+ // These don't update register values, so we don't need to handle them here
+ }
+ }
+
+ ///
+ /// Processes an XOR instruction to update register values
+ ///
+ private void ProcessXorInstruction(Instruction instruction, Dictionary registerValues)
+ {
+ // Handle XOR reg, reg (often used for zeroing a register)
+ if (instruction.StructuredOperands.Count >= 2)
+ {
+ var dest = instruction.StructuredOperands[0];
+ var src = instruction.StructuredOperands[1];
+
+ // XOR reg, same_reg (zeroing idiom)
+ if (dest is RegisterOperand destReg && src is RegisterOperand srcReg &&
+ destReg.Register == srcReg.Register)
+ {
+ registerValues[destReg.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Constant,
+ ConstantValue = 0,
+ DefiningInstruction = instruction
+ };
+ }
+ // Other XOR operations make the result unknown
+ else if (dest is RegisterOperand destReg2)
+ {
+ registerValues[destReg2.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Unknown,
+ DefiningInstruction = instruction
+ };
+ }
+ }
+ }
+
+ ///
+ /// Processes an ADD instruction to update register values
+ ///
+ private void ProcessAddInstruction(Instruction instruction, Dictionary registerValues)
+ {
+ // Handle ADD reg, imm where we know the register value
+ if (instruction.StructuredOperands.Count >= 2)
+ {
+ var dest = instruction.StructuredOperands[0];
+ var src = instruction.StructuredOperands[1];
+
+ // ADD reg, imm where reg is a known constant
+ if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc &&
+ registerValues.TryGetValue(destReg.Register, out var destValue) &&
+ destValue.Type == ValueInfo.ValueType.Constant &&
+ destValue.ConstantValue.HasValue)
+ {
+ // Calculate the new constant value
+ registerValues[destReg.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Constant,
+ ConstantValue = (uint?) (destValue.ConstantValue.Value + immSrc.Value),
+ DefiningInstruction = instruction
+ };
+ }
+ // Other ADD operations make the result unknown
+ else if (dest is RegisterOperand destReg2)
+ {
+ registerValues[destReg2.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Unknown,
+ DefiningInstruction = instruction
+ };
+ }
+ }
+ }
+
+ ///
+ /// Processes a SUB instruction to update register values
+ ///
+ private void ProcessSubInstruction(Instruction instruction, Dictionary registerValues)
+ {
+ // Handle SUB reg, imm where we know the register value
+ if (instruction.StructuredOperands.Count >= 2)
+ {
+ var dest = instruction.StructuredOperands[0];
+ var src = instruction.StructuredOperands[1];
+
+ // SUB reg, imm where reg is a known constant
+ if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc &&
+ registerValues.TryGetValue(destReg.Register, out var destValue) &&
+ destValue.Type == ValueInfo.ValueType.Constant &&
+ destValue.ConstantValue.HasValue)
+ {
+ // Calculate the new constant value
+ registerValues[destReg.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Constant,
+ ConstantValue = (uint?) (destValue.ConstantValue.Value - immSrc.Value),
+ DefiningInstruction = instruction
+ };
+ }
+ // Other SUB operations make the result unknown
+ else if (dest is RegisterOperand destReg2)
+ {
+ registerValues[destReg2.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Unknown,
+ DefiningInstruction = instruction
+ };
+ }
+ }
+ }
+
+ ///
+ /// Processes a POP instruction to update register values
+ ///
+ private void ProcessPopInstruction(Instruction instruction, Dictionary registerValues)
+ {
+ // POP reg makes the register value unknown (comes from stack)
+ if (instruction.StructuredOperands.Count >= 1 &&
+ instruction.StructuredOperands[0] is RegisterOperand destReg)
+ {
+ registerValues[destReg.Register] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Unknown,
+ DefiningInstruction = instruction
+ };
+ }
+ }
+
+ ///
+ /// Processes a CALL instruction to update register values
+ ///
+ private void ProcessCallInstruction(Instruction instruction, Dictionary registerValues)
+ {
+ // CALL instructions typically clobber EAX, ECX, and EDX in x86 calling conventions
+ registerValues[RegisterIndex.A] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Unknown,
+ DefiningInstruction = instruction
+ };
+
+ registerValues[RegisterIndex.C] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Unknown,
+ DefiningInstruction = instruction
+ };
+
+ registerValues[RegisterIndex.D] = new ValueInfo
+ {
+ Type = ValueInfo.ValueType.Unknown,
+ DefiningInstruction = instruction
+ };
+ }
+
+ ///
+ /// Stores the current register state at the given address
+ ///
+ private void StoreRegisterState(ulong address, Dictionary registerValues, AnalyzerContext context)
+ {
+ // Create a copy of the register values to store
+ var registerValuesCopy = new Dictionary(registerValues);
+
+ // Store in the context
+ context.StoreAnalysisData(address, REGISTER_VALUE_KEY, registerValuesCopy);
+ }
+
+ ///
+ /// Gets the register values at the given address
+ ///
+ public static Dictionary? GetRegisterValues(ulong address, AnalyzerContext context)
+ {
+ return context.GetAnalysisData>(address, REGISTER_VALUE_KEY);
+ }
+}
\ No newline at end of file
diff --git a/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs b/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs
new file mode 100644
index 0000000..eed8bd8
--- /dev/null
+++ b/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs
@@ -0,0 +1,58 @@
+namespace X86Disassembler.Analysers.DecompilerTypes;
+
+///
+/// Represents a calling convention used by a function
+///
+public enum CallingConvention
+{
+ ///
+ /// C declaration calling convention (caller cleans the stack)
+ /// Parameters are pushed right-to-left
+ /// EAX, ECX, EDX are caller-saved
+ /// EBX, ESI, EDI, EBP are callee-saved
+ /// Return value in EAX (or EDX:EAX for 64-bit values)
+ ///
+ Cdecl,
+
+ ///
+ /// Standard calling convention (callee cleans the stack)
+ /// Parameters are pushed right-to-left
+ /// EAX, ECX, EDX are caller-saved
+ /// EBX, ESI, EDI, EBP are callee-saved
+ /// Return value in EAX (or EDX:EAX for 64-bit values)
+ ///
+ Stdcall,
+
+ ///
+ /// Fast calling convention
+ /// First two parameters in ECX and EDX, rest on stack right-to-left
+ /// EAX, ECX, EDX are caller-saved
+ /// EBX, ESI, EDI, EBP are callee-saved
+ /// Return value in EAX
+ /// Callee cleans the stack
+ ///
+ Fastcall,
+
+ ///
+ /// This calling convention (C++ member functions)
+ /// 'this' pointer in ECX, other parameters pushed right-to-left
+ /// EAX, ECX, EDX are caller-saved
+ /// EBX, ESI, EDI, EBP are callee-saved
+ /// Return value in EAX
+ /// Caller cleans the stack
+ ///
+ Thiscall,
+
+ ///
+ /// Microsoft vectorcall convention
+ /// First six parameters in registers (XMM0-XMM5 for floating point, ECX, EDX, R8, R9 for integers)
+ /// Additional parameters pushed right-to-left
+ /// Return value in EAX or XMM0
+ ///
+ Vectorcall,
+
+ ///
+ /// Unknown calling convention
+ ///
+ Unknown
+}
diff --git a/X86Disassembler/Analysers/DecompilerTypes/DataType.cs b/X86Disassembler/Analysers/DecompilerTypes/DataType.cs
new file mode 100644
index 0000000..85b0d90
--- /dev/null
+++ b/X86Disassembler/Analysers/DecompilerTypes/DataType.cs
@@ -0,0 +1,190 @@
+namespace X86Disassembler.Analysers.DecompilerTypes;
+
+///
+/// Represents a data type in decompiled code
+///
+public class DataType
+{
+ ///
+ /// The category of the data type
+ ///
+ public enum TypeCategory
+ {
+ ///
+ /// Unknown type
+ ///
+ Unknown,
+
+ ///
+ /// Void type (no value)
+ ///
+ Void,
+
+ ///
+ /// Integer type
+ ///
+ Integer,
+
+ ///
+ /// Floating point type
+ ///
+ Float,
+
+ ///
+ /// Pointer type
+ ///
+ Pointer,
+
+ ///
+ /// Structure type
+ ///
+ Struct,
+
+ ///
+ /// Array type
+ ///
+ Array,
+
+ ///
+ /// Function type
+ ///
+ Function
+ }
+
+ ///
+ /// The name of the type
+ ///
+ public string Name { get; set; } = string.Empty;
+
+ ///
+ /// The category of the type
+ ///
+ public TypeCategory Category { get; set; }
+
+ ///
+ /// The size of the type in bytes
+ ///
+ public int Size { get; set; }
+
+ ///
+ /// Whether the type is signed (for integer types)
+ ///
+ public bool IsSigned { get; set; }
+
+ ///
+ /// The pointed-to type (for pointer types)
+ ///
+ public DataType? PointedType { get; set; }
+
+ ///
+ /// The element type (for array types)
+ ///
+ public DataType? ElementType { get; set; }
+
+ ///
+ /// The number of elements (for array types)
+ ///
+ public int? ElementCount { get; set; }
+
+ ///
+ /// The fields of the structure (for struct types)
+ ///
+ public List Fields { get; set; } = [];
+
+ ///
+ /// Creates a new data type with the specified name and category
+ ///
+ /// The name of the type
+ /// The category of the type
+ /// The size of the type in bytes
+ public DataType(string name, TypeCategory category, int size)
+ {
+ Name = name;
+ Category = category;
+ Size = size;
+ }
+
+ ///
+ /// Returns a string representation of the type
+ ///
+ public override string ToString()
+ {
+ return Name;
+ }
+
+ ///
+ /// Creates a pointer type to the specified type
+ ///
+ /// The type being pointed to
+ /// A new pointer type
+ public static DataType CreatePointerType(DataType pointedType)
+ {
+ return new DataType($"{pointedType.Name}*", TypeCategory.Pointer, 4)
+ {
+ PointedType = pointedType
+ };
+ }
+
+ ///
+ /// Creates an array type of the specified element type and count
+ ///
+ /// The type of the array elements
+ /// The number of elements in the array
+ /// A new array type
+ public static DataType CreateArrayType(DataType elementType, int count)
+ {
+ return new DataType($"{elementType.Name}[{count}]", TypeCategory.Array, elementType.Size * count)
+ {
+ ElementType = elementType,
+ ElementCount = count
+ };
+ }
+
+ ///
+ /// Common predefined types
+ ///
+ public static readonly DataType Unknown = new DataType("unknown", TypeCategory.Unknown, 0);
+ public static readonly DataType Void = new DataType("void", TypeCategory.Void, 0);
+ public static readonly DataType Char = new DataType("char", TypeCategory.Integer, 1) { IsSigned = true };
+ public static readonly DataType UChar = new DataType("unsigned char", TypeCategory.Integer, 1);
+ public static readonly DataType Short = new DataType("short", TypeCategory.Integer, 2) { IsSigned = true };
+ public static readonly DataType UShort = new DataType("unsigned short", TypeCategory.Integer, 2);
+ public static readonly DataType Int = new DataType("int", TypeCategory.Integer, 4) { IsSigned = true };
+ public static readonly DataType UInt = new DataType("unsigned int", TypeCategory.Integer, 4);
+ public static readonly DataType Float = new DataType("float", TypeCategory.Float, 4);
+ public static readonly DataType Double = new DataType("double", TypeCategory.Float, 8);
+}
+
+///
+/// Represents a field in a structure
+///
+public class StructField
+{
+ ///
+ /// The name of the field
+ ///
+ public string Name { get; set; } = string.Empty;
+
+ ///
+ /// The type of the field
+ ///
+ public DataType Type { get; set; } = DataType.Unknown;
+
+ ///
+ /// The offset of the field within the structure
+ ///
+ public int Offset { get; set; }
+
+ ///
+ /// Creates a new structure field
+ ///
+ /// The name of the field
+ /// The type of the field
+ /// The offset of the field within the structure
+ public StructField(string name, DataType type, int offset)
+ {
+ Name = name;
+ Type = type;
+ Offset = offset;
+ }
+}
diff --git a/X86Disassembler/Analysers/DecompilerTypes/Function.cs b/X86Disassembler/Analysers/DecompilerTypes/Function.cs
new file mode 100644
index 0000000..29af312
--- /dev/null
+++ b/X86Disassembler/Analysers/DecompilerTypes/Function.cs
@@ -0,0 +1,98 @@
+using X86Disassembler.X86;
+using X86Disassembler.X86.Operands;
+
+namespace X86Disassembler.Analysers.DecompilerTypes;
+
+///
+/// Represents a function in decompiled code
+///
+public class Function
+{
+ ///
+ /// The name of the function
+ ///
+ public string Name { get; set; } = string.Empty;
+
+ ///
+ /// The address of the function
+ ///
+ public ulong Address { get; set; }
+
+ ///
+ /// The return type of the function
+ ///
+ public DataType ReturnType { get; set; } = DataType.Void;
+
+ ///
+ /// The parameters of the function
+ ///
+ public List Parameters { get; set; } = [];
+
+ ///
+ /// Local variables in this function
+ ///
+ public List LocalVariables { get; } = [];
+
+ ///
+ /// Variables stored in registers
+ ///
+ public List RegisterVariables { get; } = [];
+
+ ///
+ /// The calling convention used by the function
+ ///
+ public CallingConvention CallingConvention { get; set; } = CallingConvention.Cdecl;
+
+ ///
+ /// The assembly function representation
+ ///
+ public AsmFunction AsmFunction { get; set; }
+
+ ///
+ /// Creates a new function with the specified name and address
+ ///
+ /// The name of the function
+ /// The address of the function
+ /// The assembly function representation
+ public Function(string name, ulong address, AsmFunction asmFunction)
+ {
+ Name = name;
+ Address = address;
+ AsmFunction = asmFunction;
+ }
+
+ ///
+ /// Analyzes the function to identify variables
+ ///
+ public void AnalyzeVariables()
+ {
+ // Create a variable analyzer
+ var variableAnalyzer = new VariableAnalyzer(AsmFunction.Context);
+
+ // Analyze stack variables
+ variableAnalyzer.AnalyzeStackVariables(this);
+ }
+
+
+
+
+
+
+
+ ///
+ /// Returns a string representation of the function signature
+ ///
+ public string GetSignature()
+ {
+ string paramList = string.Join(", ", Parameters.Select(p => $"{p.Type} {p.Name}"));
+ return $"{ReturnType} {Name}({paramList})";
+ }
+
+ ///
+ /// Returns a string representation of the function
+ ///
+ public override string ToString()
+ {
+ return GetSignature();
+ }
+}
diff --git a/X86Disassembler/Analysers/DecompilerTypes/Variable.cs b/X86Disassembler/Analysers/DecompilerTypes/Variable.cs
new file mode 100644
index 0000000..c428824
--- /dev/null
+++ b/X86Disassembler/Analysers/DecompilerTypes/Variable.cs
@@ -0,0 +1,102 @@
+namespace X86Disassembler.Analysers.DecompilerTypes;
+
+///
+/// Represents a variable in decompiled code
+///
+public class Variable
+{
+ ///
+ /// The type of storage for a variable
+ ///
+ public enum StorageType
+ {
+ ///
+ /// Variable stored on the stack (local variable)
+ ///
+ Stack,
+
+ ///
+ /// Variable stored in a register
+ ///
+ Register,
+
+ ///
+ /// Variable stored in global memory
+ ///
+ Global,
+
+ ///
+ /// Function parameter passed on the stack
+ ///
+ Parameter,
+
+ ///
+ /// Function parameter passed in a register
+ ///
+ RegisterParameter
+ }
+
+ ///
+ /// The name of the variable
+ ///
+ public string Name { get; set; } = string.Empty;
+
+ ///
+ /// The type of the variable
+ ///
+ public DataType Type { get; set; } = DataType.Unknown;
+
+ ///
+ /// The storage location of the variable
+ ///
+ public StorageType Storage { get; set; }
+
+ ///
+ /// The offset from the base pointer (for stack variables)
+ ///
+ public int? StackOffset { get; set; }
+
+ ///
+ /// The register that holds this variable (for register variables)
+ ///
+ public X86.RegisterIndex? Register { get; set; }
+
+ ///
+ /// The memory address (for global variables)
+ ///
+ public ulong? Address { get; set; }
+
+ ///
+ /// The size of the variable in bytes
+ ///
+ public int Size { get; set; }
+
+ ///
+ /// Whether this variable is a function parameter
+ ///
+ public bool IsParameter { get; set; }
+
+ ///
+ /// The parameter index (if this is a parameter)
+ ///
+ public int? ParameterIndex { get; set; }
+
+ ///
+ /// Creates a new variable with the specified name and type
+ ///
+ /// The name of the variable
+ /// The type of the variable
+ public Variable(string name, DataType type)
+ {
+ Name = name;
+ Type = type;
+ }
+
+ ///
+ /// Returns a string representation of the variable
+ ///
+ public override string ToString()
+ {
+ return $"{Type} {Name}";
+ }
+}
diff --git a/X86Disassembler/Analysers/LoopAnalyzer.cs b/X86Disassembler/Analysers/LoopAnalyzer.cs
new file mode 100644
index 0000000..f68624e
--- /dev/null
+++ b/X86Disassembler/Analysers/LoopAnalyzer.cs
@@ -0,0 +1,120 @@
+namespace X86Disassembler.Analysers;
+
+///
+/// Analyzes the control flow graph to identify loops
+///
+public class LoopAnalyzer
+{
+ ///
+ /// Identifies loops in the given function and stores them in the analyzer context
+ ///
+ /// The analyzer context to store results in
+ public void AnalyzeLoops(AnalyzerContext context)
+ {
+ // A back edge is an edge from a node to one of its dominators
+ // For our simplified approach, we'll identify back edges as edges that point to blocks
+ // with a lower address (potential loop headers)
+ foreach (var block in context.Function.Blocks)
+ {
+ foreach (var successor in block.Successors)
+ {
+ // If the successor has a lower address than the current block,
+ // it's potentially a back edge forming a loop
+ if (successor.Address < block.Address)
+ {
+ // Create a new loop with the identified back edge
+ var loop = new AnalyzerContext.Loop
+ {
+ Header = successor,
+ BackEdge = (block, successor)
+ };
+
+ // Find all blocks in the loop using a breadth-first search
+ FindLoopBlocks(loop);
+
+ // Find the exit blocks of the loop
+ FindLoopExits(loop);
+
+ // Store the loop in the context
+ context.LoopsByHeaderAddress[successor.Address] = loop;
+
+ // Update the blocks-to-loops mapping
+ foreach (var loopBlock in loop.Blocks)
+ {
+ if (!context.LoopsByBlockAddress.TryGetValue(loopBlock.Address, out var loops))
+ {
+ loops = [];
+ context.LoopsByBlockAddress[loopBlock.Address] = loops;
+ }
+
+ loops.Add(loop);
+ }
+ }
+ }
+ }
+ }
+
+ ///
+ /// Finds all blocks that are part of the loop
+ ///
+ /// The loop to analyze
+ private void FindLoopBlocks(AnalyzerContext.Loop loop)
+ {
+ // Start with the header block
+ loop.Blocks.Add(loop.Header);
+
+ // Use a queue for breadth-first search
+ Queue queue = new Queue();
+ queue.Enqueue(loop.BackEdge.From); // Start from the back edge source
+
+ // Keep track of visited blocks to avoid cycles
+ HashSet visited = new HashSet { loop.Header.Address };
+
+ while (queue.Count > 0)
+ {
+ var block = queue.Dequeue();
+
+ // If we've already processed this block, skip it
+ if (!visited.Add(block.Address))
+ {
+ continue;
+ }
+
+ // Add the block to the loop
+ loop.Blocks.Add(block);
+
+ // Add all predecessors to the queue (except those that would take us outside the loop)
+ foreach (var predecessor in block.Predecessors)
+ {
+ // Skip the header's predecessors that aren't in the loop already
+ // (to avoid including blocks outside the loop)
+ if (block == loop.Header && !loop.Blocks.Contains(predecessor) && predecessor != loop.BackEdge.From)
+ {
+ continue;
+ }
+
+ queue.Enqueue(predecessor);
+ }
+ }
+ }
+
+ ///
+ /// Finds all exit blocks of the loop (blocks that have successors outside the loop)
+ ///
+ /// The loop to analyze
+ private void FindLoopExits(AnalyzerContext.Loop loop)
+ {
+ foreach (var block in loop.Blocks)
+ {
+ foreach (var successor in block.Successors)
+ {
+ // If the successor is not part of the loop, this block is an exit
+ if (!loop.Blocks.Contains(successor))
+ {
+ loop.ExitBlocks.Add(block);
+ break; // Once we've identified this block as an exit, we can stop checking its successors
+ }
+ }
+ }
+ }
+}
diff --git a/X86Disassembler/Analysers/PseudocodeGenerator.cs b/X86Disassembler/Analysers/PseudocodeGenerator.cs
index 18a6af5..9e9b606 100644
--- a/X86Disassembler/Analysers/PseudocodeGenerator.cs
+++ b/X86Disassembler/Analysers/PseudocodeGenerator.cs
@@ -157,7 +157,39 @@ public class PseudocodeGenerator
// Check if this block ends with a conditional jump
bool hasConditionalJump = block.Instructions.Count > 0 &&
- IsConditionalJump(block.Instructions[^1].Type);
+ IsConditionalJump(block.Instructions[^1].Type);
+
+ // Add debug info about conditional jumps
+ if (hasConditionalJump)
+ {
+ var jumpInstruction = block.Instructions[^1];
+ result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Conditional jump {jumpInstruction} detected");
+
+ // Get the jump target address
+ ulong targetAddress = GetJumpTargetAddress(jumpInstruction);
+ result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Jump target: 0x{targetAddress:X8}");
+
+ // Check if we can find a comparison instruction before the jump
+ Instruction? comparisonInstruction = null;
+ for (int i = block.Instructions.Count - 2; i >= 0 && i >= block.Instructions.Count - 5; i--)
+ {
+ var instruction = block.Instructions[i];
+ if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
+ {
+ comparisonInstruction = instruction;
+ break;
+ }
+ }
+
+ if (comparisonInstruction != null)
+ {
+ result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Found comparison: {comparisonInstruction}");
+ }
+ else
+ {
+ result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: No comparison instruction found");
+ }
+ }
// If this block has a conditional jump but wasn't detected as an if-else structure,
// we'll create an inline if statement for better readability
diff --git a/X86Disassembler/Program.cs b/X86Disassembler/Program.cs
index 288e8bb..2fa363d 100644
--- a/X86Disassembler/Program.cs
+++ b/X86Disassembler/Program.cs
@@ -94,12 +94,25 @@ public class Program
try
{
+ // Find a suitable exported function to decompile
+ // Let's try to find a function that might have more complex control flow
+ var exportedFunctions = peFile.ExportedFunctions;
+
+ // Print all exported functions to help us choose a better one
+ Console.WriteLine("Available exported functions:");
+ foreach (var func in exportedFunctions)
+ {
+ Console.WriteLine($" - {func.Name} (RVA=0x{func.AddressRva:X8})");
+ }
+
+ // Decompile the entry point function
+ Console.WriteLine($"\nDecompiling entry point function at address 0x{peFile.OptionalHeader.AddressOfEntryPoint:X8}\n");
+
// Decompile the entry point function
var function = decompiler.DecompileFunction(peFile.OptionalHeader.AddressOfEntryPoint);
-
+
// Generate pseudocode
- string pseudocode = decompiler.GeneratePseudocode(function);
-
+ var pseudocode = decompiler.GeneratePseudocode(function);
Console.WriteLine("\nGenerated Pseudocode:\n");
Console.WriteLine(pseudocode);
}