0
mirror of https://github.com/sampletext32/ParkanPlayground.git synced 2025-07-01 04:40:25 +03:00

Fix address conversion in BlockDisassembler to properly handle RVA addresses and ensure entry blocks are correctly identified

This commit is contained in:
bird_egop
2025-04-18 21:34:35 +03:00
parent 7eead316cd
commit c7fd962d90
7 changed files with 1384 additions and 25 deletions

View File

@ -1,13 +1,41 @@
namespace X86Disassembler.Analysers; namespace X86Disassembler.Analysers;
/// <summary>
/// Represents a disassembled function with its control flow graph
/// </summary>
public class AsmFunction public class AsmFunction
{ {
/// <summary>
/// The starting address of the function
/// </summary>
public ulong Address { get; set; } public ulong Address { get; set; }
public List<InstructionBlock> Blocks { get; set; } /// <summary>
/// The list of basic blocks that make up the function
/// </summary>
public List<InstructionBlock> Blocks { get; set; } = [];
/// <summary>
/// The entry block of the function
/// </summary>
public InstructionBlock? EntryBlock => Blocks.FirstOrDefault(b => b.Address == Address);
/// <summary>
/// The exit blocks of the function (blocks that end with a return instruction)
/// </summary>
public List<InstructionBlock> ExitBlocks => Blocks.Where(b =>
b.Instructions.Count > 0 &&
b.Instructions[^1].Type.IsRet()).ToList();
/// <summary>
/// Returns a string representation of the function, including its address and blocks
/// </summary>
public override string ToString() public override string ToString()
{ {
return $"Function at {Address:X8}\n{string.Join("\n", Blocks.Select(x => $"\t{x}"))}"; return $"Function at 0x{Address:X8}\n" +
$"Entry Block: 0x{EntryBlock?.Address.ToString("X8") ?? "None"}\n" +
$"Exit Blocks: {(ExitBlocks.Count > 0 ? string.Join(", ", ExitBlocks.Select(b => $"0x{b.Address:X8}")) : "None")}\n" +
$"Total Blocks: {Blocks.Count}\n" +
$"{string.Join("\n", Blocks.Select(x => $"\t{x}"))}";
} }
} }

View File

@ -1,4 +1,4 @@
using X86Disassembler.X86; using X86Disassembler.X86;
namespace X86Disassembler.Analysers; namespace X86Disassembler.Analysers;
@ -47,11 +47,21 @@ public class BlockDisassembler
// Queue of addresses to process (breadth-first approach) // Queue of addresses to process (breadth-first approach)
Queue<ulong> addressQueue = []; Queue<ulong> addressQueue = [];
// Calculate the file offset from the RVA by subtracting the base address // Calculate the file offset from the RVA by subtracting the base address
addressQueue.Enqueue(rvaAddress - _baseAddress); // Store the file offset for processing, but we'll convert back to RVA when creating blocks
ulong fileOffset = rvaAddress - _baseAddress;
addressQueue.Enqueue(fileOffset);
// Keep track of the original entry point RVA for the function
ulong entryPointRVA = rvaAddress;
// List to store discovered basic blocks // List to store discovered basic blocks
List<InstructionBlock> blocks = []; List<InstructionBlock> blocks = [];
// Dictionary to track blocks by address for quick lookup
Dictionary<ulong, InstructionBlock> blocksByAddress = new Dictionary<ulong, InstructionBlock>();
while (addressQueue.Count > 0) while (addressQueue.Count > 0)
{ {
// Get the next address to process // Get the next address to process
@ -69,16 +79,36 @@ public class BlockDisassembler
// Collect instructions for this block // Collect instructions for this block
List<Instruction> instructions = []; List<Instruction> instructions = [];
// Get the current block if it exists (for tracking predecessors)
InstructionBlock? currentBlock = null;
if (blocksByAddress.TryGetValue(address, out var existingBlock))
{
currentBlock = existingBlock;
}
// Process instructions until we hit a control flow change // Process instructions until we hit a control flow change
while (true) while (true)
{ {
// Get the current position
ulong currentPosition = (ulong)decoder.GetPosition();
// If we've stepped onto an existing block, create a new block up to this point // If we've stepped onto an existing block, create a new block up to this point
// and stop processing this path (to avoid duplicating instructions) // and stop processing this path (to avoid duplicating instructions)
if (blocks.Any(x => x.Address == (ulong) decoder.GetPosition())) if (blocksByAddress.TryGetValue(currentPosition, out var targetBlock) && currentPosition != address)
{ {
Console.WriteLine("Stepped on to existing block. Creating in the middle"); Console.WriteLine("Stepped on to existing block. Creating in the middle");
RegisterBlock(blocks, address, instructions);
// Register this block and establish the relationship with the target block
var newBlock = RegisterBlock(blocks, address, instructions, null, false, false);
blocksByAddress[address] = newBlock;
// Add the target block as a successor to the new block
newBlock.Successors.Add(targetBlock);
// Add the new block as a predecessor to the target block
targetBlock.Predecessors.Add(newBlock);
break; break;
} }
@ -98,17 +128,22 @@ public class BlockDisassembler
// For conditional jumps, we need to follow both the jump target and the fall-through path // For conditional jumps, we need to follow both the jump target and the fall-through path
if (instruction.Type.IsConditionalJump()) if (instruction.Type.IsConditionalJump())
{ {
// Get the jump target address
uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue();
// Get the fall-through address (next instruction after this jump)
uint fallThroughAddress = (uint)decoder.GetPosition();
// Register this block (it ends with a conditional jump) // Register this block (it ends with a conditional jump)
RegisterBlock(blocks, address, instructions); var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
// Queue the jump target address for processing // Queue the jump target address for processing
addressQueue.Enqueue( addressQueue.Enqueue(jumpTargetAddress);
instruction.StructuredOperands[0]
.GetValue()
);
// Queue the fall-through address (next instruction after this jump) // Queue the fall-through address (next instruction after this jump)
addressQueue.Enqueue((uint) decoder.GetPosition()); addressQueue.Enqueue(fallThroughAddress);
break; break;
} }
@ -116,14 +151,16 @@ public class BlockDisassembler
// For unconditional jumps, we only follow the jump target // For unconditional jumps, we only follow the jump target
if (instruction.Type.IsRegularJump()) if (instruction.Type.IsRegularJump())
{ {
// Get the jump target address
uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue();
// Register this block (it ends with an unconditional jump) // Register this block (it ends with an unconditional jump)
RegisterBlock(blocks, address, instructions); var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
// Queue the jump target address for processing // Queue the jump target address for processing
addressQueue.Enqueue( addressQueue.Enqueue(jumpTargetAddress);
instruction.StructuredOperands[0]
.GetValue()
);
break; break;
} }
@ -132,7 +169,9 @@ public class BlockDisassembler
if (instruction.Type.IsRet()) if (instruction.Type.IsRet())
{ {
// Register this block (it ends with a return) // Register this block (it ends with a return)
RegisterBlock(blocks, address, instructions); var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false);
blocksByAddress[address] = newBlock;
break; break;
} }
} }
@ -142,11 +181,41 @@ public class BlockDisassembler
// we need to sort the blocks ourselves // we need to sort the blocks ourselves
blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address)); blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address));
return new AsmFunction() // Convert all block addresses from file offsets to RVA
foreach (var block in blocks)
{ {
Address = rvaAddress, // Convert from file offset to RVA by adding the base address
ulong rvaBlockAddress = block.Address + _baseAddress;
Console.WriteLine($"Converting block address from file offset 0x{block.Address:X8} to RVA 0x{rvaBlockAddress:X8}");
block.Address = rvaBlockAddress;
}
// Create a new AsmFunction with the RVA address
var asmFunction = new AsmFunction()
{
Address = entryPointRVA,
Blocks = blocks, Blocks = blocks,
}; };
// Verify that the entry block exists
var entryBlock = asmFunction.EntryBlock;
if (entryBlock == null)
{
Console.WriteLine($"Warning: No entry block found at RVA 0x{entryPointRVA:X8}");
// Try to find a block at the file offset address (for backward compatibility)
var fallbackBlock = blocks.FirstOrDefault(b => b.Address == (fileOffset + _baseAddress));
if (fallbackBlock != null)
{
Console.WriteLine($"Found fallback entry block at RVA 0x{fallbackBlock.Address:X8}");
}
}
else
{
Console.WriteLine($"Found entry block at RVA 0x{entryBlock.Address:X8}");
}
return asmFunction;
} }
/// <summary> /// <summary>
@ -155,8 +224,42 @@ public class BlockDisassembler
/// <param name="blocks">The list of blocks to add to</param> /// <param name="blocks">The list of blocks to add to</param>
/// <param name="address">The starting address of the block</param> /// <param name="address">The starting address of the block</param>
/// <param name="instructions">The instructions contained in the block</param> /// <param name="instructions">The instructions contained in the block</param>
public void RegisterBlock(List<InstructionBlock> blocks, ulong address, List<Instruction> instructions) /// <param name="currentBlock">The current block being processed (null if this is the first block)</param>
/// <param name="isJumpTarget">Whether this block is a jump target</param>
/// <param name="isFallThrough">Whether this block is a fall-through from another block</param>
/// <returns>The newly created block</returns>
public InstructionBlock RegisterBlock(
List<InstructionBlock> blocks,
ulong address,
List<Instruction> instructions,
InstructionBlock? currentBlock = null,
bool isJumpTarget = false,
bool isFallThrough = false)
{ {
// Check if a block already exists at this address
var existingBlock = blocks.FirstOrDefault(b => b.Address == address);
if (existingBlock != null)
{
// If the current block is not null, update the relationships
if (currentBlock != null)
{
// Add the existing block as a successor to the current block if not already present
if (!currentBlock.Successors.Contains(existingBlock))
{
currentBlock.Successors.Add(existingBlock);
}
// Add the current block as a predecessor to the existing block if not already present
if (!existingBlock.Predecessors.Contains(currentBlock))
{
existingBlock.Predecessors.Add(currentBlock);
}
}
return existingBlock;
}
// Create a new block with the provided address and instructions // Create a new block with the provided address and instructions
var block = new InstructionBlock() var block = new InstructionBlock()
{ {
@ -166,9 +269,21 @@ public class BlockDisassembler
// Add the block to the collection // Add the block to the collection
blocks.Add(block); blocks.Add(block);
// If the current block is not null, update the relationships
if (currentBlock != null)
{
// Add the new block as a successor to the current block
currentBlock.Successors.Add(block);
// Add the current block as a predecessor to the new block
block.Predecessors.Add(currentBlock);
}
// Log the created block for debugging // Log the created block for debugging
Console.WriteLine($"Created block:\n{block}"); Console.WriteLine($"Created block:\n{block}");
return block;
} }
} }
@ -185,13 +300,34 @@ public class InstructionBlock
/// <summary> /// <summary>
/// The list of instructions contained in this block /// The list of instructions contained in this block
/// </summary> /// </summary>
public List<Instruction> Instructions { get; set; } public List<Instruction> Instructions { get; set; } = [];
/// <summary> /// <summary>
/// Returns a string representation of the block, including its address and instructions /// The blocks that can transfer control to this block
/// </summary>
public List<InstructionBlock> Predecessors { get; set; } = [];
/// <summary>
/// The blocks that this block can transfer control to
/// </summary>
public List<InstructionBlock> Successors { get; set; } = [];
/// <summary>
/// Returns a string representation of the block, including its address, instructions, and control flow information
/// </summary> /// </summary>
public override string ToString() public override string ToString()
{ {
return $"Address: {Address:X8}\n{string.Join("\n", Instructions)}"; // Create a string for predecessors
string predecessorsStr = Predecessors.Count > 0
? $"Predecessors: {string.Join(", ", Predecessors.Select(p => $"0x{p.Address:X8}"))}"
: "No predecessors";
// Create a string for successors
string successorsStr = Successors.Count > 0
? $"Successors: {string.Join(", ", Successors.Select(s => $"0x{s.Address:X8}"))}"
: "No successors";
// Return the complete string representation
return $"Address: 0x{Address:X8}\n{predecessorsStr}\n{successorsStr}\n{string.Join("\n", Instructions)}";
} }
} }

View File

@ -0,0 +1,277 @@
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes control flow structures in disassembled code
/// </summary>
public class ControlFlowAnalyzer
{
/// <summary>
/// The analyzer context
/// </summary>
private readonly AnalyzerContext _context;
/// <summary>
/// Creates a new control flow analyzer
/// </summary>
/// <param name="context">The analyzer context</param>
public ControlFlowAnalyzer(AnalyzerContext context)
{
_context = context;
}
/// <summary>
/// Analyzes the control flow of a function to identify high-level structures
/// </summary>
/// <param name="function">The function to analyze</param>
public void AnalyzeControlFlow(Function function)
{
// First, identify if-else structures
IdentifyIfElseStructures(function);
// Then, identify switch statements
IdentifySwitchStatements(function);
}
/// <summary>
/// Identifies if-else structures in the control flow graph
/// </summary>
/// <param name="function">The function to analyze</param>
private void IdentifyIfElseStructures(Function function)
{
// For each block in the function
foreach (var block in function.AsmFunction.Blocks)
{
// Skip blocks that don't end with a conditional jump
if (block.Instructions.Count == 0)
{
continue;
}
var lastInstruction = block.Instructions[^1];
// Look for conditional jumps (Jcc instructions)
if (IsConditionalJump(lastInstruction.Type))
{
// This is a potential if-then-else structure
// The true branch is the target of the jump
// The false branch is the fallthrough block
// Get the jump target address
ulong targetAddress = GetJumpTargetAddress(lastInstruction);
// Find the target block
if (_context.BlocksByAddress.TryGetValue(targetAddress, out var targetBlock))
{
// Find the fallthrough block (the block that follows this one in memory)
var fallthroughBlock = FindFallthroughBlock(block);
if (fallthroughBlock != null)
{
// Store the if-else structure in the context
var ifElseStructure = new IfElseStructure
{
ConditionBlock = block,
ThenBlock = targetBlock,
ElseBlock = fallthroughBlock
};
_context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
}
}
}
}
}
/// <summary>
/// Identifies switch statements in the control flow graph
/// </summary>
/// <param name="function">The function to analyze</param>
private void IdentifySwitchStatements(Function function)
{
// For each block in the function
foreach (var block in function.AsmFunction.Blocks)
{
// Look for patterns that indicate a switch statement
// Common patterns include:
// 1. A series of compare and jump instructions
// 2. An indirect jump through a jump table
// For now, we'll focus on the first pattern (series of compares)
if (IsPotentialSwitchHeader(block))
{
// This is a potential switch statement
var switchStructure = new SwitchStructure
{
HeaderBlock = block,
Cases = []
};
// Find the cases by analyzing the successors
foreach (var successor in block.Successors)
{
// Each successor is a potential case
switchStructure.Cases.Add(new SwitchCase
{
CaseBlock = successor,
Value = 0 // We'd need more analysis to determine the actual value
});
}
// Store the switch structure in the context
_context.StoreAnalysisData(block.Address, "SwitchStructure", switchStructure);
}
}
}
/// <summary>
/// Checks if the given instruction type is a conditional jump
/// </summary>
/// <param name="type">The instruction type</param>
/// <returns>True if the instruction is a conditional jump, false otherwise</returns>
private bool IsConditionalJump(InstructionType type)
{
// Check for common conditional jumps
return type == InstructionType.Jz ||
type == InstructionType.Jnz ||
type == InstructionType.Jg ||
type == InstructionType.Jge ||
type == InstructionType.Jl ||
type == InstructionType.Jle ||
type == InstructionType.Ja ||
type == InstructionType.Jae ||
type == InstructionType.Jb ||
type == InstructionType.Jbe ||
type == InstructionType.Jo ||
type == InstructionType.Jno ||
type == InstructionType.Js ||
type == InstructionType.Jns ||
type == InstructionType.Jp ||
type == InstructionType.Jnp;
}
/// <summary>
/// Gets the target address of a jump instruction
/// </summary>
/// <param name="instruction">The jump instruction</param>
/// <returns>The target address of the jump</returns>
private ulong GetJumpTargetAddress(Instruction instruction)
{
// The target address is usually the first operand of the jump instruction
if (instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is ImmediateOperand immOp)
{
return (ulong)immOp.Value;
}
// If we can't determine the target, return 0
return 0;
}
/// <summary>
/// Finds the fallthrough block for a given block
/// </summary>
/// <param name="block">The block to find the fallthrough for</param>
/// <returns>The fallthrough block, or null if none found</returns>
private InstructionBlock? FindFallthroughBlock(InstructionBlock block)
{
// The fallthrough block is the one that follows this one in memory
// It should be a successor of this block
foreach (var successor in block.Successors)
{
// Check if this successor is the fallthrough block
// (its address should be immediately after this block)
if (successor.Address > block.Address)
{
return successor;
}
}
return null;
}
/// <summary>
/// Checks if the given block is a potential switch statement header
/// </summary>
/// <param name="block">The block to check</param>
/// <returns>True if the block is a potential switch header, false otherwise</returns>
private bool IsPotentialSwitchHeader(InstructionBlock block)
{
// A switch header typically has multiple successors
if (block.Successors.Count <= 2)
{
return false;
}
// Look for patterns that indicate a switch statement
// For now, we'll just check if the block ends with an indirect jump
if (block.Instructions.Count > 0)
{
var lastInstruction = block.Instructions[^1];
if (lastInstruction.Type == InstructionType.Jmp &&
lastInstruction.StructuredOperands.Count > 0 &&
!(lastInstruction.StructuredOperands[0] is ImmediateOperand))
{
return true;
}
}
return false;
}
/// <summary>
/// Represents an if-else structure in the control flow graph
/// </summary>
public class IfElseStructure
{
/// <summary>
/// The block containing the condition
/// </summary>
public InstructionBlock ConditionBlock { get; set; } = null!;
/// <summary>
/// The block containing the 'then' branch
/// </summary>
public InstructionBlock ThenBlock { get; set; } = null!;
/// <summary>
/// The block containing the 'else' branch (may be null for if-then structures)
/// </summary>
public InstructionBlock ElseBlock { get; set; } = null!;
}
/// <summary>
/// Represents a switch statement in the control flow graph
/// </summary>
public class SwitchStructure
{
/// <summary>
/// The block containing the switch header
/// </summary>
public InstructionBlock HeaderBlock { get; set; } = null!;
/// <summary>
/// The cases of the switch statement
/// </summary>
public List<SwitchCase> Cases { get; set; } = [];
}
/// <summary>
/// Represents a case in a switch statement
/// </summary>
public class SwitchCase
{
/// <summary>
/// The value of the case
/// </summary>
public int Value { get; set; }
/// <summary>
/// The block containing the case code
/// </summary>
public InstructionBlock CaseBlock { get; set; } = null!;
}
}

View File

@ -0,0 +1,149 @@
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.PE;
using X86Disassembler.X86;
namespace X86Disassembler.Analysers;
/// <summary>
/// Main engine for decompiling x86 code
/// </summary>
public class DecompilerEngine
{
/// <summary>
/// The PE file being analyzed
/// </summary>
private readonly PeFile _peFile;
/// <summary>
/// Dictionary of analyzed functions by address
/// </summary>
private readonly Dictionary<ulong, Function> _functions = [];
/// <summary>
/// Dictionary of exported function names by address
/// </summary>
private readonly Dictionary<ulong, string> _exportedFunctions = [];
/// <summary>
/// Creates a new decompiler engine for the specified PE file
/// </summary>
/// <param name="peFile">The PE file to decompile</param>
public DecompilerEngine(PeFile peFile)
{
_peFile = peFile;
// Initialize the exported functions dictionary
foreach (var export in peFile.ExportedFunctions)
{
_exportedFunctions[export.AddressRva] = export.Name;
}
}
/// <summary>
/// Decompiles a function at the specified address
/// </summary>
/// <param name="address">The address of the function to decompile</param>
/// <returns>The decompiled function</returns>
public Function DecompileFunction(ulong address)
{
// Check if we've already analyzed this function
if (_functions.TryGetValue(address, out var existingFunction))
{
return existingFunction;
}
// Find the code section containing this address
var codeSection = _peFile.SectionHeaders.Find(s =>
s.ContainsCode() &&
address >= s.VirtualAddress &&
address < s.VirtualAddress + s.VirtualSize);
if (codeSection == null)
{
throw new InvalidOperationException($"No code section found containing address 0x{address:X8}");
}
// Get the section data
int sectionIndex = _peFile.SectionHeaders.IndexOf(codeSection);
byte[] codeBytes = _peFile.GetSectionData(sectionIndex);
// Create a disassembler for the code section
var disassembler = new BlockDisassembler(codeBytes, codeSection.VirtualAddress);
// Disassemble the function
var asmFunction = disassembler.DisassembleFromAddress((uint)address);
// Create an analyzer context
var context = new AnalyzerContext(asmFunction);
// Run the analyzers
var loopAnalyzer = new LoopAnalyzer();
loopAnalyzer.AnalyzeLoops(context);
var dataFlowAnalyzer = new DataFlowAnalyzer();
dataFlowAnalyzer.AnalyzeDataFlow(context);
// Get the function name from exports if available
string functionName = _exportedFunctions.TryGetValue(address, out var name)
? name
: $"func_{address:X8}";
// Analyze the function
var functionAnalyzer = new FunctionAnalyzer(context);
var function = functionAnalyzer.AnalyzeFunction(address, functionName);
// Analyze control flow structures
var controlFlowAnalyzer = new ControlFlowAnalyzer(context);
controlFlowAnalyzer.AnalyzeControlFlow(function);
// Store the function in our cache
_functions[address] = function;
return function;
}
/// <summary>
/// Generates C-like pseudocode for a decompiled function
/// </summary>
/// <param name="function">The function to generate pseudocode for</param>
/// <returns>The generated pseudocode</returns>
public string GeneratePseudocode(Function function)
{
// Create a pseudocode generator
var generator = new PseudocodeGenerator();
// Generate the pseudocode
return generator.GeneratePseudocode(function);
}
/// <summary>
/// Decompiles all exported functions in the PE file
/// </summary>
/// <returns>A dictionary of decompiled functions by address</returns>
public Dictionary<ulong, Function> DecompileAllExportedFunctions()
{
foreach (var export in _peFile.ExportedFunctions)
{
// Skip forwarded exports
if (export.IsForwarder)
{
continue;
}
try
{
DecompileFunction(export.AddressRva);
}
catch (Exception ex)
{
Console.WriteLine($"Error decompiling function {export.Name} at 0x{export.AddressRva:X8}: {ex.Message}");
}
}
return _functions;
}
}

View File

@ -0,0 +1,132 @@
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes disassembled functions to identify variables, parameters, and control flow structures
/// </summary>
public class FunctionAnalyzer
{
/// <summary>
/// The analyzer context
/// </summary>
private readonly AnalyzerContext _context;
/// <summary>
/// Creates a new function analyzer
/// </summary>
/// <param name="context">The analyzer context</param>
public FunctionAnalyzer(AnalyzerContext context)
{
_context = context;
}
/// <summary>
/// Analyzes a function at the specified address
/// </summary>
/// <param name="address">The address of the function</param>
/// <param name="name">The name of the function (if known)</param>
/// <returns>The analyzed function</returns>
public Function AnalyzeFunction(ulong address, string name = "")
{
// If no name is provided, generate one based on the address
if (string.IsNullOrEmpty(name))
{
name = $"func_{address:X8}";
}
// Create a function object
var function = new Function(name, address, _context.Function)
{
ReturnType = DataType.Unknown // Default to unknown return type
};
// Create a variable analyzer and analyze variables
var variableAnalyzer = new VariableAnalyzer(_context);
variableAnalyzer.AnalyzeStackVariables(function);
// Determine the calling convention
DetermineCallingConvention(function);
// Infer parameter and return types
InferTypes(function);
return function;
}
/// <summary>
/// Determines the calling convention of a function based on its behavior
/// </summary>
/// <param name="function">The function to analyze</param>
private void DetermineCallingConvention(Function function)
{
// By default, we'll assume cdecl
function.CallingConvention = CallingConvention.Cdecl;
// Get the exit blocks (blocks with ret instructions)
var exitBlocks = function.AsmFunction.Blocks.Where(b =>
b.Instructions.Count > 0 &&
b.Instructions.Last().Type == InstructionType.Ret).ToList();
// Check if the function cleans up its own stack
bool cleansOwnStack = false;
// Look for ret instructions with an immediate operand
foreach (var block in function.AsmFunction.Blocks)
{
var lastInstruction = block.Instructions.LastOrDefault();
if (lastInstruction != null && lastInstruction.Type == InstructionType.Ret)
{
// If the ret instruction has an immediate operand, it's cleaning its own stack
if (lastInstruction.StructuredOperands.Count > 0 &&
lastInstruction.StructuredOperands[0] is ImmediateOperand immOp &&
immOp.Value > 0)
{
cleansOwnStack = true;
break;
}
}
}
// If the function cleans its own stack, it's likely stdcall
if (cleansOwnStack)
{
function.CallingConvention = CallingConvention.Stdcall;
// Check for thiscall (ECX used for this pointer)
// This would require more sophisticated analysis of register usage
}
// Check for fastcall (first two parameters in ECX and EDX)
// This would require more sophisticated analysis of register usage
}
/// <summary>
/// Infers types for parameters and local variables based on their usage
/// </summary>
/// <param name="function">The function to analyze</param>
private void InferTypes(Function function)
{
// This is a complex analysis that would require tracking how variables are used
// For now, we'll just set default types
// Set return type based on register usage
function.ReturnType = DataType.Int; // Default to int
// For each parameter, try to infer its type
foreach (var param in function.Parameters)
{
// Default to int for now
param.Type = DataType.Int;
}
// For each local variable, try to infer its type
foreach (var localVar in function.LocalVariables)
{
// Default to int for now
localVar.Type = DataType.Int;
}
}
}

View File

@ -0,0 +1,385 @@
using System.Text;
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Generates C-like pseudocode from decompiled functions
/// </summary>
public class PseudocodeGenerator
{
/// <summary>
/// Generates pseudocode for a decompiled function
/// </summary>
/// <param name="function">The function to generate pseudocode for</param>
/// <returns>The generated pseudocode</returns>
public string GeneratePseudocode(Function function)
{
var result = new StringBuilder();
// Add function signature
result.AppendLine($"{function.ReturnType} {function.Name}({string.Join(", ", function.Parameters.Select(p => $"{p.Type} {p.Name}"))})")
.AppendLine("{");
// Add local variable declarations
foreach (var localVar in function.LocalVariables)
{
result.AppendLine($" {localVar.Type} {localVar.Name}; // Stack offset: {localVar.StackOffset}");
}
// Add register variable declarations
foreach (var regVar in function.RegisterVariables)
{
result.AppendLine($" {regVar.Type} {regVar.Name}; // Register: {RegisterMapper.GetRegisterName(regVar.Register!.Value, 32)}");
}
if (function.LocalVariables.Count > 0 || function.RegisterVariables.Count > 0)
{
result.AppendLine();
}
// Generate the function body using control flow analysis
GenerateFunctionBody(function, result, 1);
// Add a return statement
result.AppendLine()
.AppendLine(" return 0; // Placeholder return value")
.AppendLine("}");
return result.ToString();
}
/// <summary>
/// Generates the body of the function using control flow analysis
/// </summary>
/// <param name="function">The function to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
private void GenerateFunctionBody(Function function, StringBuilder result, int indentLevel)
{
// Try to find the entry block
var entryBlock = function.AsmFunction.EntryBlock;
// If the entry block is not found, try to find a block with an address that matches the function address minus the base address
if (entryBlock == null && function.AsmFunction.Blocks.Count > 0)
{
// Get the first block as a fallback
entryBlock = function.AsmFunction.Blocks[0];
// Log a warning but continue with the first block
result.AppendLine($"{new string(' ', indentLevel * 4)}// Warning: Entry block not found at address 0x{function.Address:X8}, using first block at 0x{entryBlock.Address:X8}");
}
else if (entryBlock == null)
{
result.AppendLine($"{new string(' ', indentLevel * 4)}// Function body could not be decompiled - no blocks found");
return;
}
// Process blocks in order, starting from the entry block
var processedBlocks = new HashSet<ulong>();
GenerateBlockCode(function, entryBlock, result, indentLevel, processedBlocks);
}
/// <summary>
/// Generates code for a basic block and its successors
/// </summary>
/// <param name="function">The function containing the block</param>
/// <param name="block">The block to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Check if we've already processed this block
if (processedBlocks.Contains(block.Address))
{
return;
}
// Mark this block as processed
processedBlocks.Add(block.Address);
// Check if this block is part of a control flow structure
var context = function.AsmFunction.Context;
// Check for if-else structure
var ifElseStructure = context.GetAnalysisData<ControlFlowAnalyzer.IfElseStructure>(block.Address, "IfElseStructure");
if (ifElseStructure != null && ifElseStructure.ConditionBlock.Address == block.Address)
{
// This block is the condition of an if-else structure
GenerateIfElseCode(function, ifElseStructure, result, indentLevel, processedBlocks);
return;
}
// Check for switch structure
var switchStructure = context.GetAnalysisData<ControlFlowAnalyzer.SwitchStructure>(block.Address, "SwitchStructure");
if (switchStructure != null && switchStructure.HeaderBlock.Address == block.Address)
{
// This block is the header of a switch structure
GenerateSwitchCode(function, switchStructure, result, indentLevel, processedBlocks);
return;
}
// Check if this block is part of a loop
var loops = context.LoopsByBlockAddress.TryGetValue(block.Address, out var blockLoops) ? blockLoops : null;
if (loops != null && loops.Count > 0)
{
// Get the innermost loop
var loop = loops[0];
// Check if this is the loop header
if (loop.Header.Address == block.Address)
{
// This block is the header of a loop
GenerateLoopCode(function, loop, result, indentLevel, processedBlocks);
return;
}
}
// If we get here, this is a regular block
GenerateRegularBlockCode(function, block, result, indentLevel, processedBlocks);
}
/// <summary>
/// Generates code for a regular basic block
/// </summary>
/// <param name="function">The function containing the block</param>
/// <param name="block">The block to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateRegularBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Add a comment with the block address
string indent = new string(' ', indentLevel * 4);
result.AppendLine($"{indent}// Block at 0x{block.Address:X8}");
// Generate pseudocode for the instructions in this block
foreach (var instruction in block.Instructions)
{
// Skip function prologue/epilogue instructions
if (IsPrologueOrEpilogueInstruction(instruction))
{
continue;
}
// Generate pseudocode for this instruction
string pseudocode = GenerateInstructionPseudocode(function, instruction);
if (!string.IsNullOrEmpty(pseudocode))
{
result.AppendLine($"{indent}{pseudocode};");
}
}
// Process successors
foreach (var successor in block.Successors)
{
if (!processedBlocks.Contains(successor.Address))
{
GenerateBlockCode(function, successor, result, indentLevel, processedBlocks);
}
}
}
/// <summary>
/// Generates code for an if-else structure
/// </summary>
/// <param name="function">The function containing the structure</param>
/// <param name="ifElseStructure">The if-else structure to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateIfElseCode(Function function, ControlFlowAnalyzer.IfElseStructure ifElseStructure, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Mark the condition block as processed
processedBlocks.Add(ifElseStructure.ConditionBlock.Address);
// Generate the condition expression
string condition = GenerateConditionExpression(function, ifElseStructure.ConditionBlock);
// Add the if statement
string indent = new string(' ', indentLevel * 4);
result.AppendLine($"{indent}// If-else structure at 0x{ifElseStructure.ConditionBlock.Address:X8}")
.AppendLine($"{indent}if ({condition})");
// Add the then branch
result.AppendLine($"{indent}{{")
.AppendLine($"{indent} // Then branch at 0x{ifElseStructure.ThenBlock.Address:X8}");
// Generate code for the then branch
GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks);
// Close the then branch
result.AppendLine($"{indent}}}");
// Add the else branch if it exists and is not already processed
if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address))
{
result.AppendLine($"{indent}else")
.AppendLine($"{indent}{{")
.AppendLine($"{indent} // Else branch at 0x{ifElseStructure.ElseBlock.Address:X8}");
// Generate code for the else branch
GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks);
// Close the else branch
result.AppendLine($"{indent}}}");
}
}
/// <summary>
/// Generates code for a switch structure
/// </summary>
/// <param name="function">The function containing the structure</param>
/// <param name="switchStructure">The switch structure to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateSwitchCode(Function function, ControlFlowAnalyzer.SwitchStructure switchStructure, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Mark the header block as processed
processedBlocks.Add(switchStructure.HeaderBlock.Address);
// Generate the switch expression
string switchExpr = "/* switch expression */";
// Add the switch statement
string indent = new string(' ', indentLevel * 4);
result.AppendLine($"{indent}// Switch structure at 0x{switchStructure.HeaderBlock.Address:X8}")
.AppendLine($"{indent}switch ({switchExpr})");
// Add the switch body
result.AppendLine($"{indent}{{")
.AppendLine();
// Generate code for each case
foreach (var switchCase in switchStructure.Cases)
{
// Add the case label
result.AppendLine($"{indent} case {switchCase.Value}:")
.AppendLine($"{indent} // Case block at 0x{switchCase.CaseBlock.Address:X8}");
// Generate code for the case block
GenerateBlockCode(function, switchCase.CaseBlock, result, indentLevel + 2, processedBlocks);
// Add a break statement
result.AppendLine($"{indent} break;")
.AppendLine();
}
// Add a default case
result.AppendLine($"{indent} default:")
.AppendLine($"{indent} // Default case")
.AppendLine($"{indent} break;");
// Close the switch body
result.AppendLine($"{indent}}}");
}
/// <summary>
/// Generates code for a loop structure
/// </summary>
/// <param name="function">The function containing the structure</param>
/// <param name="loop">The loop to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateLoopCode(Function function, AnalyzerContext.Loop loop, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Mark the header block as processed
processedBlocks.Add(loop.Header.Address);
// Add the loop header
string indent = new string(' ', indentLevel * 4);
result.AppendLine($"{indent}// Loop at 0x{loop.Header.Address:X8}")
.AppendLine($"{indent}while (true) // Simplified loop condition");
// Add the loop body
result.AppendLine($"{indent}{{")
.AppendLine($"{indent} // Loop body");
// Generate code for the loop body (starting with the header)
GenerateBlockCode(function, loop.Header, result, indentLevel + 1, processedBlocks);
// Close the loop body
result.AppendLine($"{indent}}}");
}
/// <summary>
/// Generates a condition expression for an if statement
/// </summary>
/// <param name="function">The function containing the block</param>
/// <param name="conditionBlock">The block containing the condition</param>
/// <returns>A string representing the condition expression</returns>
private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock)
{
// For now, we'll just return a placeholder
// In a real implementation, we would analyze the instructions to determine the condition
return "/* condition */";
}
/// <summary>
/// Generates pseudocode for a single instruction
/// </summary>
/// <param name="function">The function containing the instruction</param>
/// <param name="instruction">The instruction to generate pseudocode for</param>
/// <returns>The generated pseudocode</returns>
private string GenerateInstructionPseudocode(Function function, Instruction instruction)
{
// For now, we'll just return a comment with the instruction
return $"/* {instruction} */";
}
/// <summary>
/// Checks if an instruction is part of the function prologue or epilogue
/// </summary>
/// <param name="instruction">The instruction to check</param>
/// <returns>True if the instruction is part of the prologue or epilogue, false otherwise</returns>
private bool IsPrologueOrEpilogueInstruction(Instruction instruction)
{
// Check for common prologue instructions
if (instruction.Type == InstructionType.Push &&
instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand regOp &&
regOp.Register == RegisterIndex.Bp)
{
return true; // push ebp
}
if (instruction.Type == InstructionType.Mov &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand destReg &&
instruction.StructuredOperands[1] is RegisterOperand srcReg &&
destReg.Register == RegisterIndex.Bp &&
srcReg.Register == RegisterIndex.Sp)
{
return true; // mov ebp, esp
}
if (instruction.Type == InstructionType.Sub &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand subReg &&
subReg.Register == RegisterIndex.Sp)
{
return true; // sub esp, X
}
// Check for common epilogue instructions
if (instruction.Type == InstructionType.Pop &&
instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand popReg &&
popReg.Register == RegisterIndex.Bp)
{
return true; // pop ebp
}
if (instruction.Type == InstructionType.Ret)
{
return true; // ret
}
return false;
}
}

View File

@ -0,0 +1,252 @@
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes disassembled code to identify and track variables
/// </summary>
public class VariableAnalyzer
{
/// <summary>
/// The analyzer context
/// </summary>
private readonly AnalyzerContext _context;
/// <summary>
/// Creates a new variable analyzer
/// </summary>
/// <param name="context">The analyzer context</param>
public VariableAnalyzer(AnalyzerContext context)
{
_context = context;
}
/// <summary>
/// Analyzes the function to identify stack variables
/// </summary>
/// <param name="function">The function to analyze</param>
public void AnalyzeStackVariables(Function function)
{
// Dictionary to track stack offsets and their corresponding variables
var stackOffsets = new Dictionary<int, Variable>();
// First, identify the function prologue to determine stack frame setup
bool hasPushEbp = false;
bool hasMovEbpEsp = false;
int localSize = 0;
// Look for the function prologue pattern: push ebp; mov ebp, esp; sub esp, X
foreach (var block in function.AsmFunction.Blocks)
{
foreach (var instruction in block.Instructions)
{
// Look for push ebp
if (instruction.Type == InstructionType.Push &&
instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand regOp &&
regOp.Register == RegisterIndex.Bp)
{
hasPushEbp = true;
continue;
}
// Look for mov ebp, esp
if (instruction.Type == InstructionType.Mov &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand destReg &&
instruction.StructuredOperands[1] is RegisterOperand srcReg &&
destReg.Register == RegisterIndex.Bp &&
srcReg.Register == RegisterIndex.Sp)
{
hasMovEbpEsp = true;
continue;
}
// Look for sub esp, X to determine local variable space
if (instruction.Type == InstructionType.Sub &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand subReg &&
instruction.StructuredOperands[1] is ImmediateOperand immOp &&
subReg.Register == RegisterIndex.Sp)
{
localSize = (int)immOp.Value;
break;
}
}
// If we found the complete prologue, no need to check more blocks
if (hasPushEbp && hasMovEbpEsp && localSize > 0)
{
break;
}
}
// If we didn't find a standard prologue, we can't reliably analyze stack variables
if (!hasPushEbp || !hasMovEbpEsp)
{
return;
}
// Now scan for memory accesses relative to EBP
foreach (var block in function.AsmFunction.Blocks)
{
foreach (var instruction in block.Instructions)
{
// Look for memory operands that reference [ebp+X] or [ebp-X]
foreach (var operand in instruction.StructuredOperands)
{
if (operand is DisplacementMemoryOperand memOp &&
memOp.BaseRegister == RegisterIndex.Bp)
{
// This is accessing memory relative to EBP
int offset = (int)memOp.Displacement;
// Determine if this is a parameter or local variable
if (offset > 0 && offset < 1000) // Positive offset = parameter (with reasonable limit)
{
// Parameters start at [ebp+8] (return address at [ebp+4], saved ebp at [ebp+0])
int paramIndex = (offset - 8) / 4; // Assuming 4-byte parameters
// Make sure we have enough parameters in the function
while (function.Parameters.Count <= paramIndex)
{
var param = new Variable($"param_{function.Parameters.Count + 1}", DataType.Unknown)
{
Storage = Variable.StorageType.Parameter,
StackOffset = 8 + (function.Parameters.Count * 4),
IsParameter = true,
ParameterIndex = function.Parameters.Count,
Size = 4 // Assume 4 bytes (32-bit)
};
function.Parameters.Add(param);
}
}
else if (offset < 0 && offset > -1000) // Negative offset = local variable (with reasonable limit)
{
// Check if we've already seen this offset
if (!stackOffsets.TryGetValue(offset, out var variable))
{
// Create a new local variable
variable = new Variable($"local_{Math.Abs(offset)}", DataType.Unknown)
{
Storage = Variable.StorageType.Stack,
StackOffset = offset,
Size = 4 // Assume 4 bytes (32-bit)
};
// Add to our tracking dictionaries
stackOffsets[offset] = variable;
function.LocalVariables.Add(variable);
}
// Track the usage of this variable
TrackVariableUsage(variable, instruction);
}
}
}
}
}
// Analyze register-based variables
AnalyzeRegisterVariables(function);
}
/// <summary>
/// Analyzes register usage to identify variables stored in registers
/// </summary>
/// <param name="function">The function to analyze</param>
private void AnalyzeRegisterVariables(Function function)
{
// This is a more complex analysis that would track register values across blocks
// For now, we'll focus on identifying registers that hold consistent values
// Dictionary to track register variables
var registerVariables = new Dictionary<RegisterIndex, Variable>();
// For each block, analyze register usage
foreach (var block in function.AsmFunction.Blocks)
{
// Check if we have register values for this block from data flow analysis
var registerValuesKey = "RegisterValues";
if (_context.GetAnalysisData<Dictionary<RegisterIndex, DataFlowAnalyzer.ValueInfo>>(block.Address, registerValuesKey) is Dictionary<RegisterIndex, DataFlowAnalyzer.ValueInfo> registerValues)
{
foreach (var kvp in registerValues)
{
var register = kvp.Key;
var valueInfo = kvp.Value;
// Skip special registers like ESP and EBP
if (register == RegisterIndex.Sp || register == RegisterIndex.Bp)
{
continue;
}
// If the register holds a constant value, it might be a variable
if (valueInfo.Type == DataFlowAnalyzer.ValueInfo.ValueType.Constant)
{
// Check if we already have a variable for this register
if (!registerVariables.TryGetValue(register, out var variable))
{
// Create a new register variable
variable = new Variable($"reg_{RegisterMapper.GetRegisterName(register, 32)}", DataType.Unknown)
{
Storage = Variable.StorageType.Register,
Register = register,
Size = 4 // Assume 4 bytes (32-bit)
};
// Add to our tracking dictionary
registerVariables[register] = variable;
function.RegisterVariables.Add(variable);
}
}
}
}
}
}
/// <summary>
/// Tracks how a variable is used in an instruction
/// </summary>
/// <param name="variable">The variable to track</param>
/// <param name="instruction">The instruction using the variable</param>
private void TrackVariableUsage(Variable variable, Instruction instruction)
{
// For now, we'll just try to infer the variable type based on its usage
// If the variable is used in a comparison with 0, it might be a boolean
if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
{
if (instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[1] is ImmediateOperand immOp &&
immOp.Value == 0)
{
// This might be a boolean check
if (variable.Type == DataType.Unknown)
{
// Set to int for now as we don't have a bool type
variable.Type = DataType.Int;
}
}
}
// If the variable is used with string instructions, it might be a string
// Check for string operations - we don't have specific string instruction types yet
// Skip string detection for now as we don't have the specific instruction types
// We'll detect strings through other means later
// If the variable is used with floating-point instructions, it might be a float
// Check for floating-point operations
if (instruction.Type == InstructionType.Fld ||
instruction.Type == InstructionType.Fst ||
instruction.Type == InstructionType.Fstp)
{
if (variable.Type == DataType.Unknown)
{
variable.Type = DataType.Float;
}
}
}
}