0
mirror of https://github.com/sampletext32/ParkanPlayground.git synced 2025-07-01 04:40:25 +03:00

decompiler iter1

This commit is contained in:
bird_egop
2025-04-18 23:46:51 +03:00
parent 0ddbfd2951
commit de2e4312fb
12 changed files with 1486 additions and 140 deletions

View File

@ -0,0 +1,125 @@
namespace X86Disassembler.Analysers;
/// <summary>
/// Central context for all analysis data related to a disassembled function
/// </summary>
public class AnalyzerContext
{
/// <summary>
/// The function being analyzed
/// </summary>
public AsmFunction Function { get; }
/// <summary>
/// Dictionary mapping block addresses to instruction blocks
/// </summary>
public Dictionary<ulong, InstructionBlock> BlocksByAddress { get; } = [];
/// <summary>
/// Dictionary mapping loop header addresses to loops
/// </summary>
public Dictionary<ulong, Loop> LoopsByHeaderAddress { get; } = [];
/// <summary>
/// Dictionary mapping block addresses to the loops that contain them
/// </summary>
public Dictionary<ulong, List<Loop>> LoopsByBlockAddress { get; } = [];
/// <summary>
/// Dictionary for storing arbitrary analysis data by address
/// </summary>
public Dictionary<ulong, Dictionary<string, object>> AnalysisDataByAddress { get; } = [];
/// <summary>
/// Creates a new analyzer context for the given function
/// </summary>
/// <param name="function">The function to analyze</param>
public AnalyzerContext(AsmFunction function)
{
Function = function;
// Initialize the block dictionary
foreach (var block in function.Blocks)
{
BlocksByAddress[block.Address] = block;
}
}
/// <summary>
/// Represents a loop in the control flow graph
/// </summary>
public class Loop
{
/// <summary>
/// The header block of the loop (the entry point into the loop)
/// </summary>
public InstructionBlock Header { get; set; } = null!;
/// <summary>
/// The blocks that are part of this loop
/// </summary>
public List<InstructionBlock> Blocks { get; set; } = [];
/// <summary>
/// The back edge that completes the loop (from a block back to the header)
/// </summary>
public (InstructionBlock From, InstructionBlock To) BackEdge { get; set; }
/// <summary>
/// The exit blocks of the loop (blocks that have successors outside the loop)
/// </summary>
public List<InstructionBlock> ExitBlocks { get; set; } = [];
}
/// <summary>
/// Stores analysis data for a specific address
/// </summary>
/// <param name="address">The address to store data for</param>
/// <param name="key">The key for the data</param>
/// <param name="value">The data to store</param>
public void StoreAnalysisData(ulong address, string key, object value)
{
if (!AnalysisDataByAddress.TryGetValue(address, out var dataDict))
{
dataDict = [];
AnalysisDataByAddress[address] = dataDict;
}
dataDict[key] = value;
}
/// <summary>
/// Retrieves analysis data for a specific address
/// </summary>
/// <param name="address">The address to retrieve data for</param>
/// <param name="key">The key for the data</param>
/// <returns>The stored data, or null if not found</returns>
public object? GetAnalysisData(ulong address, string key)
{
if (AnalysisDataByAddress.TryGetValue(address, out var dataDict) &&
dataDict.TryGetValue(key, out var value))
{
return value;
}
return null;
}
/// <summary>
/// Retrieves typed analysis data for a specific address
/// </summary>
/// <typeparam name="T">The type of data to retrieve</typeparam>
/// <param name="address">The address to retrieve data for</param>
/// <param name="key">The key for the data</param>
/// <returns>The stored data, or default(T) if not found or wrong type</returns>
public T? GetAnalysisData<T>(ulong address, string key)
{
var data = GetAnalysisData(address, key);
if (data is T typedData)
{
return typedData;
}
return default;
}
}

View File

@ -26,16 +26,62 @@ public class AsmFunction
public List<InstructionBlock> ExitBlocks => Blocks.Where(b =>
b.Instructions.Count > 0 &&
b.Instructions[^1].Type.IsRet()).ToList();
/// <summary>
/// The analyzer context for this function
/// </summary>
public AnalyzerContext Context { get; private set; }
/// <summary>
/// Creates a new AsmFunction instance
/// </summary>
public AsmFunction()
{
Context = new AnalyzerContext(this);
}
/// <summary>
/// Analyzes the function using various analyzers
/// </summary>
public void Analyze()
{
// Analyze loops
var loopAnalyzer = new LoopAnalyzer();
loopAnalyzer.AnalyzeLoops(Context);
// Analyze data flow
var dataFlowAnalyzer = new DataFlowAnalyzer();
dataFlowAnalyzer.AnalyzeDataFlow(Context);
}
/// <summary>
/// Returns a string representation of the function, including its address and blocks
/// Returns a string representation of the function, including its address, blocks, and analysis results
/// </summary>
public override string ToString()
{
string loopsInfo = "";
if (Context.LoopsByHeaderAddress.Count > 0)
{
loopsInfo = $"Loops: {Context.LoopsByHeaderAddress.Count}\n";
int i = 0;
foreach (var loop in Context.LoopsByHeaderAddress.Values)
{
loopsInfo += $" Loop {i++}: Header=0x{loop.Header.Address:X8}, " +
$"Blocks={loop.Blocks.Count}, " +
$"Back Edge=(0x{loop.BackEdge.From.Address:X8} -> 0x{loop.BackEdge.To.Address:X8}), " +
$"Exits={loop.ExitBlocks.Count}\n";
}
}
else
{
loopsInfo = "Loops: None\n";
}
return $"Function at 0x{Address:X8}\n" +
$"Entry Block: 0x{EntryBlock?.Address.ToString("X8") ?? "None"}\n" +
$"Exit Blocks: {(ExitBlocks.Count > 0 ? string.Join(", ", ExitBlocks.Select(b => $"0x{b.Address:X8}")) : "None")}\n" +
$"Total Blocks: {Blocks.Count}\n" +
loopsInfo +
$"{string.Join("\n", Blocks.Select(x => $"\t{x}"))}";
}
}

View File

@ -1,4 +1,5 @@
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
@ -138,11 +139,45 @@ public class BlockDisassembler
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
// Queue the jump target address for processing
addressQueue.Enqueue(jumpTargetAddress);
// Register the target block if it doesn't exist yet
InstructionBlock? jumpTargetBlock = null;
if (blocksByAddress.TryGetValue(jumpTargetAddress, out var existingTargetBlock))
{
jumpTargetBlock = existingTargetBlock;
}
else
{
// We'll create this block later when we process the queue
// For now, just queue it for processing
addressQueue.Enqueue(jumpTargetAddress);
}
// Queue the fall-through address (next instruction after this jump)
addressQueue.Enqueue(fallThroughAddress);
// Register the fall-through block if it doesn't exist yet
InstructionBlock? fallThroughBlock = null;
if (blocksByAddress.TryGetValue(fallThroughAddress, out var existingFallThroughBlock))
{
fallThroughBlock = existingFallThroughBlock;
}
else
{
// We'll create this block later when we process the queue
// For now, just queue it for processing
addressQueue.Enqueue(fallThroughAddress);
}
// If the jump target block exists, add it as a successor to the current block
if (jumpTargetBlock != null)
{
newBlock.Successors.Add(jumpTargetBlock);
jumpTargetBlock.Predecessors.Add(newBlock);
}
// If the fall-through block exists, add it as a successor to the current block
if (fallThroughBlock != null)
{
newBlock.Successors.Add(fallThroughBlock);
fallThroughBlock.Predecessors.Add(newBlock);
}
break;
}
@ -158,8 +193,25 @@ public class BlockDisassembler
var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
// Queue the jump target address for processing
addressQueue.Enqueue(jumpTargetAddress);
// Register the target block if it doesn't exist yet
InstructionBlock? jumpTargetBlock = null;
if (blocksByAddress.TryGetValue(jumpTargetAddress, out var existingTargetBlock))
{
jumpTargetBlock = existingTargetBlock;
}
else
{
// We'll create this block later when we process the queue
// For now, just queue it for processing
addressQueue.Enqueue(jumpTargetAddress);
}
// If the jump target block exists, add it as a successor to the current block
if (jumpTargetBlock != null)
{
newBlock.Successors.Add(jumpTargetBlock);
jumpTargetBlock.Predecessors.Add(newBlock);
}
break;
}
@ -181,11 +233,201 @@ public class BlockDisassembler
// we need to sort the blocks ourselves
blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address));
// Convert all block addresses from file offsets to RVA
// First, establish the successor and predecessor relationships based on file offsets
// This is done by analyzing the last instruction of each block
foreach (var block in blocks)
{
// Convert from file offset to RVA by adding the base address
block.Address += _baseAddress;
if (block.Instructions.Count == 0) continue;
var lastInstruction = block.Instructions[^1];
// Check if the last instruction is a conditional jump
if (lastInstruction.Type.IsConditionalJump())
{
// Get the jump target address (file offset)
ulong targetAddress = 0;
if (lastInstruction.StructuredOperands.Count > 0 && lastInstruction.StructuredOperands[0] is RelativeOffsetOperand relOp)
{
targetAddress = relOp.TargetAddress;
}
// Find the target block
var targetBlock = blocks.FirstOrDefault(b => b.Address == targetAddress);
if (targetBlock != null)
{
// Add the target block as a successor to this block
if (!block.Successors.Contains(targetBlock))
{
block.Successors.Add(targetBlock);
}
// Add this block as a predecessor to the target block
if (!targetBlock.Predecessors.Contains(block))
{
targetBlock.Predecessors.Add(block);
}
// For conditional jumps, also add the fall-through block as a successor
// The fall-through block is the one that immediately follows this block in memory
// Find the next block in address order
var nextBlock = blocks.OrderBy(b => b.Address).FirstOrDefault(b => b.Address > block.Address);
if (nextBlock != null)
{
// The fall-through block is the one that immediately follows this block in memory
var fallThroughBlock = nextBlock;
// Add the fall-through block as a successor to this block
if (!block.Successors.Contains(fallThroughBlock))
{
block.Successors.Add(fallThroughBlock);
}
// Add this block as a predecessor to the fall-through block
if (!fallThroughBlock.Predecessors.Contains(block))
{
fallThroughBlock.Predecessors.Add(block);
}
}
}
}
// Check if the last instruction is an unconditional jump
else if (lastInstruction.Type == InstructionType.Jmp)
{
// Get the jump target address (file offset)
ulong targetAddress = 0;
if (lastInstruction.StructuredOperands.Count > 0 && lastInstruction.StructuredOperands[0] is RelativeOffsetOperand relOp)
{
targetAddress = relOp.TargetAddress;
}
// Find the target block
var targetBlock = blocks.FirstOrDefault(b => b.Address == targetAddress);
if (targetBlock != null)
{
// Add the target block as a successor to this block
if (!block.Successors.Contains(targetBlock))
{
block.Successors.Add(targetBlock);
}
// Add this block as a predecessor to the target block
if (!targetBlock.Predecessors.Contains(block))
{
targetBlock.Predecessors.Add(block);
}
}
}
// For non-jump instructions that don't end the function (like Ret), add the fall-through block
else if (!lastInstruction.Type.IsRet())
{
// The fall-through block is the one that immediately follows this block in memory
// Find the next block in address order
var nextBlock = blocks.OrderBy(b => b.Address).FirstOrDefault(b => b.Address > block.Address);
if (nextBlock != null)
{
// The fall-through block is the one that immediately follows this block in memory
var fallThroughBlock = nextBlock;
// Add the fall-through block as a successor to this block
if (!block.Successors.Contains(fallThroughBlock))
{
block.Successors.Add(fallThroughBlock);
}
// Add this block as a predecessor to the fall-through block
if (!fallThroughBlock.Predecessors.Contains(block))
{
fallThroughBlock.Predecessors.Add(block);
}
}
}
}
// Store the original file offset for each block in a dictionary
Dictionary<InstructionBlock, ulong> blockToFileOffset = new Dictionary<InstructionBlock, ulong>();
foreach (var block in blocks)
{
blockToFileOffset[block] = block.Address;
}
// Convert all block addresses from file offsets to RVA
// and update the block dictionary for quick lookup
Dictionary<ulong, InstructionBlock> rvaBlocksByAddress = new Dictionary<ulong, InstructionBlock>();
Dictionary<ulong, ulong> fileOffsetToRvaMap = new Dictionary<ulong, ulong>();
// First pass: create a mapping from file offset to RVA for each block
foreach (var block in blocks)
{
// Get the original file offset address
ulong blockFileOffset = block.Address;
// Calculate the RVA address
ulong blockRvaAddress = blockFileOffset + _baseAddress;
// Store the mapping
fileOffsetToRvaMap[blockFileOffset] = blockRvaAddress;
}
// Second pass: update all blocks to use RVA addresses
foreach (var block in blocks)
{
// Get the original file offset address
ulong blockFileOffset = block.Address;
// Update the block's address to RVA
ulong blockRvaAddress = fileOffsetToRvaMap[blockFileOffset];
block.Address = blockRvaAddress;
// Add to the dictionary for quick lookup
rvaBlocksByAddress[blockRvaAddress] = block;
}
// Now update all successors and predecessors to use the correct RVA addresses
foreach (var block in blocks)
{
// Create new lists for successors and predecessors with the correct RVA addresses
List<InstructionBlock> updatedSuccessors = new List<InstructionBlock>();
List<InstructionBlock> updatedPredecessors = new List<InstructionBlock>();
// Update successors
foreach (var successor in block.Successors)
{
// Get the original file offset of the successor
if (blockToFileOffset.TryGetValue(successor, out ulong successorFileOffset))
{
// Look up the RVA address in our mapping
if (fileOffsetToRvaMap.TryGetValue(successorFileOffset, out ulong successorRvaAddress))
{
// Find the block with this RVA address
if (rvaBlocksByAddress.TryGetValue(successorRvaAddress, out var rvaSuccessor))
{
updatedSuccessors.Add(rvaSuccessor);
}
}
}
}
// Update predecessors
foreach (var predecessor in block.Predecessors)
{
// Get the original file offset of the predecessor
if (blockToFileOffset.TryGetValue(predecessor, out ulong predecessorFileOffset))
{
// Look up the RVA address in our mapping
if (fileOffsetToRvaMap.TryGetValue(predecessorFileOffset, out ulong predecessorRvaAddress))
{
// Find the block with this RVA address
if (rvaBlocksByAddress.TryGetValue(predecessorRvaAddress, out var rvaPredecessor))
{
updatedPredecessors.Add(rvaPredecessor);
}
}
}
}
// Replace the old lists with the updated ones
block.Successors = updatedSuccessors;
block.Predecessors = updatedPredecessors;
}
// Create a new AsmFunction with the RVA address
@ -246,7 +488,7 @@ public class BlockDisassembler
var block = new InstructionBlock()
{
Address = address,
Instructions = instructions
Instructions = new List<Instruction>(instructions) // Create a copy of the instructions list
};
// Add the block to the collection
@ -261,8 +503,6 @@ public class BlockDisassembler
// Add the current block as a predecessor to the new block
block.Predecessors.Add(currentBlock);
}
// Block created successfully
return block;
}

View File

@ -42,89 +42,61 @@ public class ControlFlowAnalyzer
/// <param name="function">The function to analyze</param>
private void IdentifyIfElseStructures(Function function)
{
// First pass: identify basic if-else structures
// Now analyze each block for conditional jumps
foreach (var block in function.AsmFunction.Blocks)
{
// Skip blocks that don't end with a conditional jump
if (block.Instructions.Count == 0)
{
continue;
}
// Get the last instruction in the block
var lastInstruction = block.Instructions.LastOrDefault();
if (lastInstruction == null) continue;
var lastInstruction = block.Instructions[^1];
// Look for conditional jumps (Jcc instructions)
if (IsConditionalJump(lastInstruction.Type))
// Check if the last instruction is a conditional jump
if (lastInstruction.Type.IsConditionalJump())
{
// This is a potential if-then-else structure
// The true branch is the target of the jump
// The false branch is the fallthrough block
// Get the jump target address
ulong targetAddress = GetJumpTargetAddress(lastInstruction);
// Find the target block
if (_context.BlocksByAddress.TryGetValue(targetAddress, out var targetBlock))
InstructionBlock? targetBlock = null;
foreach (var b in function.AsmFunction.Blocks)
{
// Find the fallthrough block (the block that follows this one in memory)
var fallthroughBlock = FindFallthroughBlock(block);
if (fallthroughBlock != null)
if (b.Address == targetAddress)
{
// Check if the fallthrough block ends with an unconditional jump
// This could indicate an if-else structure where the 'else' branch jumps to a common merge point
InstructionBlock? mergeBlock = null;
bool hasElseBlock = true;
if (fallthroughBlock.Instructions.Count > 0 &&
fallthroughBlock.Instructions[^1].Type == InstructionType.Jmp)
{
// Get the jump target address
ulong mergeAddress = GetJumpTargetAddress(fallthroughBlock.Instructions[^1]);
// Find the merge block
if (_context.BlocksByAddress.TryGetValue(mergeAddress, out var potentialMergeBlock))
{
mergeBlock = potentialMergeBlock;
}
}
// Check if the 'then' block also jumps to the same merge point
if (mergeBlock != null && targetBlock.Instructions.Count > 0 &&
targetBlock.Instructions[^1].Type == InstructionType.Jmp)
{
ulong thenJumpAddress = GetJumpTargetAddress(targetBlock.Instructions[^1]);
if (thenJumpAddress == mergeBlock.Address)
{
// We have a classic if-else structure with a merge point
// Store the if-else structure in the context
var ifElseStructure = new IfElseStructure
{
ConditionBlock = block,
ThenBlock = targetBlock,
ElseBlock = fallthroughBlock,
MergeBlock = mergeBlock,
IsComplete = true // Both branches merge back
};
_context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
continue;
}
}
// If we get here, we have a simple if-then or if-then-else without a clear merge point
var simpleIfStructure = new IfElseStructure
{
ConditionBlock = block,
ThenBlock = targetBlock,
ElseBlock = hasElseBlock ? fallthroughBlock : null,
IsComplete = false // No clear merge point
};
_context.StoreAnalysisData(block.Address, "IfElseStructure", simpleIfStructure);
targetBlock = b;
break;
}
}
if (targetBlock == null)
{
continue;
}
// Find the fall-through block (should be in the successors)
InstructionBlock? fallThroughBlock = null;
foreach (var successor in block.Successors)
{
if (successor != targetBlock)
{
fallThroughBlock = successor;
break;
}
}
if (fallThroughBlock == null)
{
continue;
}
// Create an if-else structure
var ifElseStructure = new IfElseStructure
{
ConditionBlock = block,
ThenBlock = targetBlock,
ElseBlock = fallThroughBlock
};
// Store the if-else structure in the analysis context
function.AsmFunction.Context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
}
}
@ -195,32 +167,6 @@ public class ControlFlowAnalyzer
}
}
/// <summary>
/// Checks if the given instruction type is a conditional jump
/// </summary>
/// <param name="type">The instruction type</param>
/// <returns>True if the instruction is a conditional jump, false otherwise</returns>
private bool IsConditionalJump(InstructionType type)
{
// Check for common conditional jumps
return type == InstructionType.Jz ||
type == InstructionType.Jnz ||
type == InstructionType.Jg ||
type == InstructionType.Jge ||
type == InstructionType.Jl ||
type == InstructionType.Jle ||
type == InstructionType.Ja ||
type == InstructionType.Jae ||
type == InstructionType.Jb ||
type == InstructionType.Jbe ||
type == InstructionType.Jo ||
type == InstructionType.Jno ||
type == InstructionType.Js ||
type == InstructionType.Jns ||
type == InstructionType.Jp ||
type == InstructionType.Jnp;
}
/// <summary>
/// Gets the target address of a jump instruction
/// </summary>
@ -228,39 +174,31 @@ public class ControlFlowAnalyzer
/// <returns>The target address of the jump</returns>
private ulong GetJumpTargetAddress(Instruction instruction)
{
// The target address is usually the first operand of the jump instruction
if (instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is ImmediateOperand immOp)
// Add debug output to see the instruction and its operands
// For conditional jumps, the target address is the first operand
if (instruction.StructuredOperands.Count > 0)
{
return (ulong)immOp.Value;
var operand = instruction.StructuredOperands[0];
if (operand is ImmediateOperand immOp)
{
return (ulong)immOp.Value;
}
else if (operand is RelativeOffsetOperand relOp)
{
// For relative jumps, the target address is directly available in the operand
// We need to convert from file offset to RVA by adding 0x1000 (the section offset)
// This matches how the blocks are converted in BlockDisassembler.cs
ulong rvaTargetAddress = relOp.TargetAddress + 0x1000;
return rvaTargetAddress;
}
}
// If we can't determine the target, return 0
return 0;
}
/// <summary>
/// Finds the fallthrough block for a given block
/// </summary>
/// <param name="block">The block to find the fallthrough for</param>
/// <returns>The fallthrough block, or null if none found</returns>
private InstructionBlock? FindFallthroughBlock(InstructionBlock block)
{
// The fallthrough block is the one that follows this one in memory
// It should be a successor of this block
foreach (var successor in block.Successors)
{
// Check if this successor is the fallthrough block
// (its address should be immediately after this block)
if (successor.Address > block.Address)
{
return successor;
}
}
return null;
}
/// <summary>
/// Checks if the given block is a potential switch statement header
/// </summary>

View File

@ -0,0 +1,384 @@
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes data flow through instructions to track register values
/// </summary>
public class DataFlowAnalyzer
{
// Constants for analysis data keys
private const string REGISTER_VALUE_KEY = "RegisterValue";
private const string MEMORY_VALUE_KEY = "MemoryValue";
/// <summary>
/// Represents a known value for a register or memory location
/// </summary>
public class ValueInfo
{
/// <summary>
/// The type of value (constant, register, memory, unknown)
/// </summary>
public enum ValueType
{
Unknown,
Constant,
Register,
Memory
}
/// <summary>
/// The type of this value
/// </summary>
public ValueType Type { get; set; } = ValueType.Unknown;
/// <summary>
/// The constant value (if Type is Constant)
/// </summary>
public ulong? ConstantValue { get; set; }
/// <summary>
/// The source register (if Type is Register)
/// </summary>
public RegisterIndex? SourceRegister { get; set; }
/// <summary>
/// The memory address or expression (if Type is Memory)
/// </summary>
public string? MemoryExpression { get; set; }
/// <summary>
/// The instruction that defined this value
/// </summary>
public Instruction? DefiningInstruction { get; set; }
/// <summary>
/// Returns a string representation of the value
/// </summary>
public override string ToString()
{
return Type switch
{
ValueType.Constant => $"0x{ConstantValue:X8}",
ValueType.Register => $"{SourceRegister}",
ValueType.Memory => $"[{MemoryExpression}]",
_ => "unknown"
};
}
}
/// <summary>
/// Analyzes data flow in the function and stores results in the analyzer context
/// </summary>
/// <param name="context">The analyzer context to store results in</param>
public void AnalyzeDataFlow(AnalyzerContext context)
{
// Process each block in order
foreach (var block in context.Function.Blocks)
{
// Dictionary to track register values within this block
Dictionary<RegisterIndex, ValueInfo> registerValues = new();
// Process each instruction in the block
foreach (var instruction in block.Instructions)
{
// Process the instruction based on its type
ProcessInstruction(instruction, registerValues, context);
// Store the current register state at this instruction's address
StoreRegisterState(instruction.Address, registerValues, context);
}
}
}
/// <summary>
/// Processes an instruction to update register values
/// </summary>
/// <param name="instruction">The instruction to process</param>
/// <param name="registerValues">The current register values</param>
/// <param name="context">The analyzer context</param>
private void ProcessInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues, AnalyzerContext context)
{
// Handle different instruction types
switch (instruction.Type)
{
// MOV instructions
case InstructionType.Mov:
ProcessMovInstruction(instruction, registerValues);
break;
// XOR instructions
case InstructionType.Xor:
ProcessXorInstruction(instruction, registerValues);
break;
// ADD instructions
case InstructionType.Add:
ProcessAddInstruction(instruction, registerValues);
break;
// SUB instructions
case InstructionType.Sub:
ProcessSubInstruction(instruction, registerValues);
break;
// PUSH/POP instructions can affect register values
case InstructionType.Pop:
ProcessPopInstruction(instruction, registerValues);
break;
// Call instructions typically clobber certain registers
case InstructionType.Call:
ProcessCallInstruction(instruction, registerValues);
break;
// Other instructions that modify registers
default:
// For now, mark destination registers as unknown for unsupported instructions
if (instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand regOp)
{
registerValues[regOp.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
break;
}
}
/// <summary>
/// Processes a MOV instruction to update register values
/// </summary>
private void ProcessMovInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// Handle different MOV variants
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// MOV reg, imm
if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc)
{
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Constant,
ConstantValue = immSrc.Value,
DefiningInstruction = instruction
};
}
// MOV reg, reg
else if (dest is RegisterOperand destReg2 && src is RegisterOperand srcReg)
{
if (registerValues.TryGetValue(srcReg.Register, out var srcValue))
{
// Copy the source value
registerValues[destReg2.Register] = new ValueInfo
{
Type = srcValue.Type,
ConstantValue = srcValue.ConstantValue,
SourceRegister = srcValue.SourceRegister,
MemoryExpression = srcValue.MemoryExpression,
DefiningInstruction = instruction
};
}
else
{
// Source register value is unknown
registerValues[destReg2.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Register,
SourceRegister = srcReg.Register,
DefiningInstruction = instruction
};
}
}
// MOV reg, [mem]
else if (dest is RegisterOperand destReg3 && src is MemoryOperand memSrc)
{
registerValues[destReg3.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Memory,
MemoryExpression = memSrc.ToString(),
DefiningInstruction = instruction
};
}
// MOV [mem], reg or MOV [mem], imm
// These don't update register values, so we don't need to handle them here
}
}
/// <summary>
/// Processes an XOR instruction to update register values
/// </summary>
private void ProcessXorInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// Handle XOR reg, reg (often used for zeroing a register)
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// XOR reg, same_reg (zeroing idiom)
if (dest is RegisterOperand destReg && src is RegisterOperand srcReg &&
destReg.Register == srcReg.Register)
{
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Constant,
ConstantValue = 0,
DefiningInstruction = instruction
};
}
// Other XOR operations make the result unknown
else if (dest is RegisterOperand destReg2)
{
registerValues[destReg2.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
}
}
/// <summary>
/// Processes an ADD instruction to update register values
/// </summary>
private void ProcessAddInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// Handle ADD reg, imm where we know the register value
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// ADD reg, imm where reg is a known constant
if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc &&
registerValues.TryGetValue(destReg.Register, out var destValue) &&
destValue.Type == ValueInfo.ValueType.Constant &&
destValue.ConstantValue.HasValue)
{
// Calculate the new constant value
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Constant,
ConstantValue = (uint?) (destValue.ConstantValue.Value + immSrc.Value),
DefiningInstruction = instruction
};
}
// Other ADD operations make the result unknown
else if (dest is RegisterOperand destReg2)
{
registerValues[destReg2.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
}
}
/// <summary>
/// Processes a SUB instruction to update register values
/// </summary>
private void ProcessSubInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// Handle SUB reg, imm where we know the register value
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// SUB reg, imm where reg is a known constant
if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc &&
registerValues.TryGetValue(destReg.Register, out var destValue) &&
destValue.Type == ValueInfo.ValueType.Constant &&
destValue.ConstantValue.HasValue)
{
// Calculate the new constant value
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Constant,
ConstantValue = (uint?) (destValue.ConstantValue.Value - immSrc.Value),
DefiningInstruction = instruction
};
}
// Other SUB operations make the result unknown
else if (dest is RegisterOperand destReg2)
{
registerValues[destReg2.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
}
}
/// <summary>
/// Processes a POP instruction to update register values
/// </summary>
private void ProcessPopInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// POP reg makes the register value unknown (comes from stack)
if (instruction.StructuredOperands.Count >= 1 &&
instruction.StructuredOperands[0] is RegisterOperand destReg)
{
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
}
/// <summary>
/// Processes a CALL instruction to update register values
/// </summary>
private void ProcessCallInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// CALL instructions typically clobber EAX, ECX, and EDX in x86 calling conventions
registerValues[RegisterIndex.A] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
registerValues[RegisterIndex.C] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
registerValues[RegisterIndex.D] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
/// <summary>
/// Stores the current register state at the given address
/// </summary>
private void StoreRegisterState(ulong address, Dictionary<RegisterIndex, ValueInfo> registerValues, AnalyzerContext context)
{
// Create a copy of the register values to store
var registerValuesCopy = new Dictionary<RegisterIndex, ValueInfo>(registerValues);
// Store in the context
context.StoreAnalysisData(address, REGISTER_VALUE_KEY, registerValuesCopy);
}
/// <summary>
/// Gets the register values at the given address
/// </summary>
public static Dictionary<string, ValueInfo>? GetRegisterValues(ulong address, AnalyzerContext context)
{
return context.GetAnalysisData<Dictionary<string, ValueInfo>>(address, REGISTER_VALUE_KEY);
}
}

View File

@ -0,0 +1,58 @@
namespace X86Disassembler.Analysers.DecompilerTypes;
/// <summary>
/// Represents a calling convention used by a function
/// </summary>
public enum CallingConvention
{
/// <summary>
/// C declaration calling convention (caller cleans the stack)
/// Parameters are pushed right-to-left
/// EAX, ECX, EDX are caller-saved
/// EBX, ESI, EDI, EBP are callee-saved
/// Return value in EAX (or EDX:EAX for 64-bit values)
/// </summary>
Cdecl,
/// <summary>
/// Standard calling convention (callee cleans the stack)
/// Parameters are pushed right-to-left
/// EAX, ECX, EDX are caller-saved
/// EBX, ESI, EDI, EBP are callee-saved
/// Return value in EAX (or EDX:EAX for 64-bit values)
/// </summary>
Stdcall,
/// <summary>
/// Fast calling convention
/// First two parameters in ECX and EDX, rest on stack right-to-left
/// EAX, ECX, EDX are caller-saved
/// EBX, ESI, EDI, EBP are callee-saved
/// Return value in EAX
/// Callee cleans the stack
/// </summary>
Fastcall,
/// <summary>
/// This calling convention (C++ member functions)
/// 'this' pointer in ECX, other parameters pushed right-to-left
/// EAX, ECX, EDX are caller-saved
/// EBX, ESI, EDI, EBP are callee-saved
/// Return value in EAX
/// Caller cleans the stack
/// </summary>
Thiscall,
/// <summary>
/// Microsoft vectorcall convention
/// First six parameters in registers (XMM0-XMM5 for floating point, ECX, EDX, R8, R9 for integers)
/// Additional parameters pushed right-to-left
/// Return value in EAX or XMM0
/// </summary>
Vectorcall,
/// <summary>
/// Unknown calling convention
/// </summary>
Unknown
}

View File

@ -0,0 +1,190 @@
namespace X86Disassembler.Analysers.DecompilerTypes;
/// <summary>
/// Represents a data type in decompiled code
/// </summary>
public class DataType
{
/// <summary>
/// The category of the data type
/// </summary>
public enum TypeCategory
{
/// <summary>
/// Unknown type
/// </summary>
Unknown,
/// <summary>
/// Void type (no value)
/// </summary>
Void,
/// <summary>
/// Integer type
/// </summary>
Integer,
/// <summary>
/// Floating point type
/// </summary>
Float,
/// <summary>
/// Pointer type
/// </summary>
Pointer,
/// <summary>
/// Structure type
/// </summary>
Struct,
/// <summary>
/// Array type
/// </summary>
Array,
/// <summary>
/// Function type
/// </summary>
Function
}
/// <summary>
/// The name of the type
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// The category of the type
/// </summary>
public TypeCategory Category { get; set; }
/// <summary>
/// The size of the type in bytes
/// </summary>
public int Size { get; set; }
/// <summary>
/// Whether the type is signed (for integer types)
/// </summary>
public bool IsSigned { get; set; }
/// <summary>
/// The pointed-to type (for pointer types)
/// </summary>
public DataType? PointedType { get; set; }
/// <summary>
/// The element type (for array types)
/// </summary>
public DataType? ElementType { get; set; }
/// <summary>
/// The number of elements (for array types)
/// </summary>
public int? ElementCount { get; set; }
/// <summary>
/// The fields of the structure (for struct types)
/// </summary>
public List<StructField> Fields { get; set; } = [];
/// <summary>
/// Creates a new data type with the specified name and category
/// </summary>
/// <param name="name">The name of the type</param>
/// <param name="category">The category of the type</param>
/// <param name="size">The size of the type in bytes</param>
public DataType(string name, TypeCategory category, int size)
{
Name = name;
Category = category;
Size = size;
}
/// <summary>
/// Returns a string representation of the type
/// </summary>
public override string ToString()
{
return Name;
}
/// <summary>
/// Creates a pointer type to the specified type
/// </summary>
/// <param name="pointedType">The type being pointed to</param>
/// <returns>A new pointer type</returns>
public static DataType CreatePointerType(DataType pointedType)
{
return new DataType($"{pointedType.Name}*", TypeCategory.Pointer, 4)
{
PointedType = pointedType
};
}
/// <summary>
/// Creates an array type of the specified element type and count
/// </summary>
/// <param name="elementType">The type of the array elements</param>
/// <param name="count">The number of elements in the array</param>
/// <returns>A new array type</returns>
public static DataType CreateArrayType(DataType elementType, int count)
{
return new DataType($"{elementType.Name}[{count}]", TypeCategory.Array, elementType.Size * count)
{
ElementType = elementType,
ElementCount = count
};
}
/// <summary>
/// Common predefined types
/// </summary>
public static readonly DataType Unknown = new DataType("unknown", TypeCategory.Unknown, 0);
public static readonly DataType Void = new DataType("void", TypeCategory.Void, 0);
public static readonly DataType Char = new DataType("char", TypeCategory.Integer, 1) { IsSigned = true };
public static readonly DataType UChar = new DataType("unsigned char", TypeCategory.Integer, 1);
public static readonly DataType Short = new DataType("short", TypeCategory.Integer, 2) { IsSigned = true };
public static readonly DataType UShort = new DataType("unsigned short", TypeCategory.Integer, 2);
public static readonly DataType Int = new DataType("int", TypeCategory.Integer, 4) { IsSigned = true };
public static readonly DataType UInt = new DataType("unsigned int", TypeCategory.Integer, 4);
public static readonly DataType Float = new DataType("float", TypeCategory.Float, 4);
public static readonly DataType Double = new DataType("double", TypeCategory.Float, 8);
}
/// <summary>
/// Represents a field in a structure
/// </summary>
public class StructField
{
/// <summary>
/// The name of the field
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// The type of the field
/// </summary>
public DataType Type { get; set; } = DataType.Unknown;
/// <summary>
/// The offset of the field within the structure
/// </summary>
public int Offset { get; set; }
/// <summary>
/// Creates a new structure field
/// </summary>
/// <param name="name">The name of the field</param>
/// <param name="type">The type of the field</param>
/// <param name="offset">The offset of the field within the structure</param>
public StructField(string name, DataType type, int offset)
{
Name = name;
Type = type;
Offset = offset;
}
}

View File

@ -0,0 +1,98 @@
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers.DecompilerTypes;
/// <summary>
/// Represents a function in decompiled code
/// </summary>
public class Function
{
/// <summary>
/// The name of the function
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// The address of the function
/// </summary>
public ulong Address { get; set; }
/// <summary>
/// The return type of the function
/// </summary>
public DataType ReturnType { get; set; } = DataType.Void;
/// <summary>
/// The parameters of the function
/// </summary>
public List<Variable> Parameters { get; set; } = [];
/// <summary>
/// Local variables in this function
/// </summary>
public List<Variable> LocalVariables { get; } = [];
/// <summary>
/// Variables stored in registers
/// </summary>
public List<Variable> RegisterVariables { get; } = [];
/// <summary>
/// The calling convention used by the function
/// </summary>
public CallingConvention CallingConvention { get; set; } = CallingConvention.Cdecl;
/// <summary>
/// The assembly function representation
/// </summary>
public AsmFunction AsmFunction { get; set; }
/// <summary>
/// Creates a new function with the specified name and address
/// </summary>
/// <param name="name">The name of the function</param>
/// <param name="address">The address of the function</param>
/// <param name="asmFunction">The assembly function representation</param>
public Function(string name, ulong address, AsmFunction asmFunction)
{
Name = name;
Address = address;
AsmFunction = asmFunction;
}
/// <summary>
/// Analyzes the function to identify variables
/// </summary>
public void AnalyzeVariables()
{
// Create a variable analyzer
var variableAnalyzer = new VariableAnalyzer(AsmFunction.Context);
// Analyze stack variables
variableAnalyzer.AnalyzeStackVariables(this);
}
/// <summary>
/// Returns a string representation of the function signature
/// </summary>
public string GetSignature()
{
string paramList = string.Join(", ", Parameters.Select(p => $"{p.Type} {p.Name}"));
return $"{ReturnType} {Name}({paramList})";
}
/// <summary>
/// Returns a string representation of the function
/// </summary>
public override string ToString()
{
return GetSignature();
}
}

View File

@ -0,0 +1,102 @@
namespace X86Disassembler.Analysers.DecompilerTypes;
/// <summary>
/// Represents a variable in decompiled code
/// </summary>
public class Variable
{
/// <summary>
/// The type of storage for a variable
/// </summary>
public enum StorageType
{
/// <summary>
/// Variable stored on the stack (local variable)
/// </summary>
Stack,
/// <summary>
/// Variable stored in a register
/// </summary>
Register,
/// <summary>
/// Variable stored in global memory
/// </summary>
Global,
/// <summary>
/// Function parameter passed on the stack
/// </summary>
Parameter,
/// <summary>
/// Function parameter passed in a register
/// </summary>
RegisterParameter
}
/// <summary>
/// The name of the variable
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// The type of the variable
/// </summary>
public DataType Type { get; set; } = DataType.Unknown;
/// <summary>
/// The storage location of the variable
/// </summary>
public StorageType Storage { get; set; }
/// <summary>
/// The offset from the base pointer (for stack variables)
/// </summary>
public int? StackOffset { get; set; }
/// <summary>
/// The register that holds this variable (for register variables)
/// </summary>
public X86.RegisterIndex? Register { get; set; }
/// <summary>
/// The memory address (for global variables)
/// </summary>
public ulong? Address { get; set; }
/// <summary>
/// The size of the variable in bytes
/// </summary>
public int Size { get; set; }
/// <summary>
/// Whether this variable is a function parameter
/// </summary>
public bool IsParameter { get; set; }
/// <summary>
/// The parameter index (if this is a parameter)
/// </summary>
public int? ParameterIndex { get; set; }
/// <summary>
/// Creates a new variable with the specified name and type
/// </summary>
/// <param name="name">The name of the variable</param>
/// <param name="type">The type of the variable</param>
public Variable(string name, DataType type)
{
Name = name;
Type = type;
}
/// <summary>
/// Returns a string representation of the variable
/// </summary>
public override string ToString()
{
return $"{Type} {Name}";
}
}

View File

@ -0,0 +1,120 @@
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes the control flow graph to identify loops
/// </summary>
public class LoopAnalyzer
{
/// <summary>
/// Identifies loops in the given function and stores them in the analyzer context
/// </summary>
/// <param name="context">The analyzer context to store results in</param>
public void AnalyzeLoops(AnalyzerContext context)
{
// A back edge is an edge from a node to one of its dominators
// For our simplified approach, we'll identify back edges as edges that point to blocks
// with a lower address (potential loop headers)
foreach (var block in context.Function.Blocks)
{
foreach (var successor in block.Successors)
{
// If the successor has a lower address than the current block,
// it's potentially a back edge forming a loop
if (successor.Address < block.Address)
{
// Create a new loop with the identified back edge
var loop = new AnalyzerContext.Loop
{
Header = successor,
BackEdge = (block, successor)
};
// Find all blocks in the loop using a breadth-first search
FindLoopBlocks(loop);
// Find the exit blocks of the loop
FindLoopExits(loop);
// Store the loop in the context
context.LoopsByHeaderAddress[successor.Address] = loop;
// Update the blocks-to-loops mapping
foreach (var loopBlock in loop.Blocks)
{
if (!context.LoopsByBlockAddress.TryGetValue(loopBlock.Address, out var loops))
{
loops = [];
context.LoopsByBlockAddress[loopBlock.Address] = loops;
}
loops.Add(loop);
}
}
}
}
}
/// <summary>
/// Finds all blocks that are part of the loop
/// </summary>
/// <param name="loop">The loop to analyze</param>
private void FindLoopBlocks(AnalyzerContext.Loop loop)
{
// Start with the header block
loop.Blocks.Add(loop.Header);
// Use a queue for breadth-first search
Queue<InstructionBlock> queue = new Queue<InstructionBlock>();
queue.Enqueue(loop.BackEdge.From); // Start from the back edge source
// Keep track of visited blocks to avoid cycles
HashSet<ulong> visited = new HashSet<ulong> { loop.Header.Address };
while (queue.Count > 0)
{
var block = queue.Dequeue();
// If we've already processed this block, skip it
if (!visited.Add(block.Address))
{
continue;
}
// Add the block to the loop
loop.Blocks.Add(block);
// Add all predecessors to the queue (except those that would take us outside the loop)
foreach (var predecessor in block.Predecessors)
{
// Skip the header's predecessors that aren't in the loop already
// (to avoid including blocks outside the loop)
if (block == loop.Header && !loop.Blocks.Contains(predecessor) && predecessor != loop.BackEdge.From)
{
continue;
}
queue.Enqueue(predecessor);
}
}
}
/// <summary>
/// Finds all exit blocks of the loop (blocks that have successors outside the loop)
/// </summary>
/// <param name="loop">The loop to analyze</param>
private void FindLoopExits(AnalyzerContext.Loop loop)
{
foreach (var block in loop.Blocks)
{
foreach (var successor in block.Successors)
{
// If the successor is not part of the loop, this block is an exit
if (!loop.Blocks.Contains(successor))
{
loop.ExitBlocks.Add(block);
break; // Once we've identified this block as an exit, we can stop checking its successors
}
}
}
}
}

View File

@ -157,7 +157,39 @@ public class PseudocodeGenerator
// Check if this block ends with a conditional jump
bool hasConditionalJump = block.Instructions.Count > 0 &&
IsConditionalJump(block.Instructions[^1].Type);
IsConditionalJump(block.Instructions[^1].Type);
// Add debug info about conditional jumps
if (hasConditionalJump)
{
var jumpInstruction = block.Instructions[^1];
result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Conditional jump {jumpInstruction} detected");
// Get the jump target address
ulong targetAddress = GetJumpTargetAddress(jumpInstruction);
result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Jump target: 0x{targetAddress:X8}");
// Check if we can find a comparison instruction before the jump
Instruction? comparisonInstruction = null;
for (int i = block.Instructions.Count - 2; i >= 0 && i >= block.Instructions.Count - 5; i--)
{
var instruction = block.Instructions[i];
if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
{
comparisonInstruction = instruction;
break;
}
}
if (comparisonInstruction != null)
{
result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Found comparison: {comparisonInstruction}");
}
else
{
result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: No comparison instruction found");
}
}
// If this block has a conditional jump but wasn't detected as an if-else structure,
// we'll create an inline if statement for better readability

View File

@ -94,12 +94,25 @@ public class Program
try
{
// Find a suitable exported function to decompile
// Let's try to find a function that might have more complex control flow
var exportedFunctions = peFile.ExportedFunctions;
// Print all exported functions to help us choose a better one
Console.WriteLine("Available exported functions:");
foreach (var func in exportedFunctions)
{
Console.WriteLine($" - {func.Name} (RVA=0x{func.AddressRva:X8})");
}
// Decompile the entry point function
Console.WriteLine($"\nDecompiling entry point function at address 0x{peFile.OptionalHeader.AddressOfEntryPoint:X8}\n");
// Decompile the entry point function
var function = decompiler.DecompileFunction(peFile.OptionalHeader.AddressOfEntryPoint);
// Generate pseudocode
string pseudocode = decompiler.GeneratePseudocode(function);
var pseudocode = decompiler.GeneratePseudocode(function);
Console.WriteLine("\nGenerated Pseudocode:\n");
Console.WriteLine(pseudocode);
}