0
mirror of https://github.com/sampletext32/ParkanPlayground.git synced 2025-07-01 04:40:25 +03:00

changes all over the place

This commit is contained in:
bird_egop
2025-04-19 02:12:46 +03:00
parent de2e4312fb
commit 1c7054781c
16 changed files with 134 additions and 2909 deletions

View File

@ -1,125 +0,0 @@
namespace X86Disassembler.Analysers;
/// <summary>
/// Central context for all analysis data related to a disassembled function
/// </summary>
public class AnalyzerContext
{
/// <summary>
/// The function being analyzed
/// </summary>
public AsmFunction Function { get; }
/// <summary>
/// Dictionary mapping block addresses to instruction blocks
/// </summary>
public Dictionary<ulong, InstructionBlock> BlocksByAddress { get; } = [];
/// <summary>
/// Dictionary mapping loop header addresses to loops
/// </summary>
public Dictionary<ulong, Loop> LoopsByHeaderAddress { get; } = [];
/// <summary>
/// Dictionary mapping block addresses to the loops that contain them
/// </summary>
public Dictionary<ulong, List<Loop>> LoopsByBlockAddress { get; } = [];
/// <summary>
/// Dictionary for storing arbitrary analysis data by address
/// </summary>
public Dictionary<ulong, Dictionary<string, object>> AnalysisDataByAddress { get; } = [];
/// <summary>
/// Creates a new analyzer context for the given function
/// </summary>
/// <param name="function">The function to analyze</param>
public AnalyzerContext(AsmFunction function)
{
Function = function;
// Initialize the block dictionary
foreach (var block in function.Blocks)
{
BlocksByAddress[block.Address] = block;
}
}
/// <summary>
/// Represents a loop in the control flow graph
/// </summary>
public class Loop
{
/// <summary>
/// The header block of the loop (the entry point into the loop)
/// </summary>
public InstructionBlock Header { get; set; } = null!;
/// <summary>
/// The blocks that are part of this loop
/// </summary>
public List<InstructionBlock> Blocks { get; set; } = [];
/// <summary>
/// The back edge that completes the loop (from a block back to the header)
/// </summary>
public (InstructionBlock From, InstructionBlock To) BackEdge { get; set; }
/// <summary>
/// The exit blocks of the loop (blocks that have successors outside the loop)
/// </summary>
public List<InstructionBlock> ExitBlocks { get; set; } = [];
}
/// <summary>
/// Stores analysis data for a specific address
/// </summary>
/// <param name="address">The address to store data for</param>
/// <param name="key">The key for the data</param>
/// <param name="value">The data to store</param>
public void StoreAnalysisData(ulong address, string key, object value)
{
if (!AnalysisDataByAddress.TryGetValue(address, out var dataDict))
{
dataDict = [];
AnalysisDataByAddress[address] = dataDict;
}
dataDict[key] = value;
}
/// <summary>
/// Retrieves analysis data for a specific address
/// </summary>
/// <param name="address">The address to retrieve data for</param>
/// <param name="key">The key for the data</param>
/// <returns>The stored data, or null if not found</returns>
public object? GetAnalysisData(ulong address, string key)
{
if (AnalysisDataByAddress.TryGetValue(address, out var dataDict) &&
dataDict.TryGetValue(key, out var value))
{
return value;
}
return null;
}
/// <summary>
/// Retrieves typed analysis data for a specific address
/// </summary>
/// <typeparam name="T">The type of data to retrieve</typeparam>
/// <param name="address">The address to retrieve data for</param>
/// <param name="key">The key for the data</param>
/// <returns>The stored data, or default(T) if not found or wrong type</returns>
public T? GetAnalysisData<T>(ulong address, string key)
{
var data = GetAnalysisData(address, key);
if (data is T typedData)
{
return typedData;
}
return default;
}
}

View File

@ -15,73 +15,8 @@ public class AsmFunction
/// </summary>
public List<InstructionBlock> Blocks { get; set; } = [];
/// <summary>
/// The entry block of the function
/// </summary>
public InstructionBlock? EntryBlock => Blocks.FirstOrDefault(b => b.Address == Address);
/// <summary>
/// The exit blocks of the function (blocks that end with a return instruction)
/// </summary>
public List<InstructionBlock> ExitBlocks => Blocks.Where(b =>
b.Instructions.Count > 0 &&
b.Instructions[^1].Type.IsRet()).ToList();
/// <summary>
/// The analyzer context for this function
/// </summary>
public AnalyzerContext Context { get; private set; }
/// <summary>
/// Creates a new AsmFunction instance
/// </summary>
public AsmFunction()
{
Context = new AnalyzerContext(this);
}
/// <summary>
/// Analyzes the function using various analyzers
/// </summary>
public void Analyze()
{
// Analyze loops
var loopAnalyzer = new LoopAnalyzer();
loopAnalyzer.AnalyzeLoops(Context);
// Analyze data flow
var dataFlowAnalyzer = new DataFlowAnalyzer();
dataFlowAnalyzer.AnalyzeDataFlow(Context);
}
/// <summary>
/// Returns a string representation of the function, including its address, blocks, and analysis results
/// </summary>
public override string ToString()
{
string loopsInfo = "";
if (Context.LoopsByHeaderAddress.Count > 0)
{
loopsInfo = $"Loops: {Context.LoopsByHeaderAddress.Count}\n";
int i = 0;
foreach (var loop in Context.LoopsByHeaderAddress.Values)
{
loopsInfo += $" Loop {i++}: Header=0x{loop.Header.Address:X8}, " +
$"Blocks={loop.Blocks.Count}, " +
$"Back Edge=(0x{loop.BackEdge.From.Address:X8} -> 0x{loop.BackEdge.To.Address:X8}), " +
$"Exits={loop.ExitBlocks.Count}\n";
}
}
else
{
loopsInfo = "Loops: None\n";
}
return $"Function at 0x{Address:X8}\n" +
$"Entry Block: 0x{EntryBlock?.Address.ToString("X8") ?? "None"}\n" +
$"Exit Blocks: {(ExitBlocks.Count > 0 ? string.Join(", ", ExitBlocks.Select(b => $"0x{b.Address:X8}")) : "None")}\n" +
$"Total Blocks: {Blocks.Count}\n" +
loopsInfo +
$"{string.Join("\n", Blocks.Select(x => $"\t{x}"))}";
return $"{Address:X8}\n{string.Join("\n", Blocks)}";
}
}

View File

@ -1,303 +0,0 @@
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes control flow structures in disassembled code
/// </summary>
public class ControlFlowAnalyzer
{
/// <summary>
/// The analyzer context
/// </summary>
private readonly AnalyzerContext _context;
/// <summary>
/// Creates a new control flow analyzer
/// </summary>
/// <param name="context">The analyzer context</param>
public ControlFlowAnalyzer(AnalyzerContext context)
{
_context = context;
}
/// <summary>
/// Analyzes the control flow of a function to identify high-level structures
/// </summary>
/// <param name="function">The function to analyze</param>
public void AnalyzeControlFlow(Function function)
{
// First, identify if-else structures
IdentifyIfElseStructures(function);
// Then, identify switch statements
IdentifySwitchStatements(function);
}
/// <summary>
/// Identifies if-else structures in the control flow graph
/// </summary>
/// <param name="function">The function to analyze</param>
private void IdentifyIfElseStructures(Function function)
{
// Now analyze each block for conditional jumps
foreach (var block in function.AsmFunction.Blocks)
{
// Get the last instruction in the block
var lastInstruction = block.Instructions.LastOrDefault();
if (lastInstruction == null) continue;
// Check if the last instruction is a conditional jump
if (lastInstruction.Type.IsConditionalJump())
{
// Get the jump target address
ulong targetAddress = GetJumpTargetAddress(lastInstruction);
// Find the target block
InstructionBlock? targetBlock = null;
foreach (var b in function.AsmFunction.Blocks)
{
if (b.Address == targetAddress)
{
targetBlock = b;
break;
}
}
if (targetBlock == null)
{
continue;
}
// Find the fall-through block (should be in the successors)
InstructionBlock? fallThroughBlock = null;
foreach (var successor in block.Successors)
{
if (successor != targetBlock)
{
fallThroughBlock = successor;
break;
}
}
if (fallThroughBlock == null)
{
continue;
}
// Create an if-else structure
var ifElseStructure = new IfElseStructure
{
ConditionBlock = block,
ThenBlock = targetBlock,
ElseBlock = fallThroughBlock
};
// Store the if-else structure in the analysis context
function.AsmFunction.Context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
}
}
// Second pass: identify nested if-else structures
foreach (var block in function.AsmFunction.Blocks)
{
var ifElseStructure = _context.GetAnalysisData<IfElseStructure>(block.Address, "IfElseStructure");
if (ifElseStructure != null)
{
// Check if the 'then' block contains another if-else structure
var nestedThenIf = _context.GetAnalysisData<IfElseStructure>(ifElseStructure.ThenBlock.Address, "IfElseStructure");
if (nestedThenIf != null)
{
ifElseStructure.NestedThenStructure = nestedThenIf;
}
// Check if the 'else' block contains another if-else structure
if (ifElseStructure.ElseBlock != null)
{
var nestedElseIf = _context.GetAnalysisData<IfElseStructure>(ifElseStructure.ElseBlock.Address, "IfElseStructure");
if (nestedElseIf != null)
{
ifElseStructure.NestedElseStructure = nestedElseIf;
}
}
}
}
}
/// <summary>
/// Identifies switch statements in the control flow graph
/// </summary>
/// <param name="function">The function to analyze</param>
private void IdentifySwitchStatements(Function function)
{
// For each block in the function
foreach (var block in function.AsmFunction.Blocks)
{
// Look for patterns that indicate a switch statement
// Common patterns include:
// 1. A series of compare and jump instructions
// 2. An indirect jump through a jump table
// For now, we'll focus on the first pattern (series of compares)
if (IsPotentialSwitchHeader(block))
{
// This is a potential switch statement
var switchStructure = new SwitchStructure
{
HeaderBlock = block,
Cases = []
};
// Find the cases by analyzing the successors
foreach (var successor in block.Successors)
{
// Each successor is a potential case
switchStructure.Cases.Add(new SwitchCase
{
CaseBlock = successor,
Value = 0 // We'd need more analysis to determine the actual value
});
}
// Store the switch structure in the context
_context.StoreAnalysisData(block.Address, "SwitchStructure", switchStructure);
}
}
}
/// <summary>
/// Gets the target address of a jump instruction
/// </summary>
/// <param name="instruction">The jump instruction</param>
/// <returns>The target address of the jump</returns>
private ulong GetJumpTargetAddress(Instruction instruction)
{
// Add debug output to see the instruction and its operands
// For conditional jumps, the target address is the first operand
if (instruction.StructuredOperands.Count > 0)
{
var operand = instruction.StructuredOperands[0];
if (operand is ImmediateOperand immOp)
{
return (ulong)immOp.Value;
}
else if (operand is RelativeOffsetOperand relOp)
{
// For relative jumps, the target address is directly available in the operand
// We need to convert from file offset to RVA by adding 0x1000 (the section offset)
// This matches how the blocks are converted in BlockDisassembler.cs
ulong rvaTargetAddress = relOp.TargetAddress + 0x1000;
return rvaTargetAddress;
}
}
// If we can't determine the target, return 0
return 0;
}
/// <summary>
/// Checks if the given block is a potential switch statement header
/// </summary>
/// <param name="block">The block to check</param>
/// <returns>True if the block is a potential switch header, false otherwise</returns>
private bool IsPotentialSwitchHeader(InstructionBlock block)
{
// A switch header typically has multiple successors
if (block.Successors.Count <= 2)
{
return false;
}
// Look for patterns that indicate a switch statement
// For now, we'll just check if the block ends with an indirect jump
if (block.Instructions.Count > 0)
{
var lastInstruction = block.Instructions[^1];
if (lastInstruction.Type == InstructionType.Jmp &&
lastInstruction.StructuredOperands.Count > 0 &&
!(lastInstruction.StructuredOperands[0] is ImmediateOperand))
{
return true;
}
}
return false;
}
/// <summary>
/// Represents an if-else structure in the control flow graph
/// </summary>
public class IfElseStructure
{
/// <summary>
/// The block containing the condition
/// </summary>
public InstructionBlock ConditionBlock { get; set; } = null!;
/// <summary>
/// The block representing the 'then' branch (taken when condition is true)
/// </summary>
public InstructionBlock ThenBlock { get; set; } = null!;
/// <summary>
/// The block representing the 'else' branch (taken when condition is false)
/// </summary>
public InstructionBlock? ElseBlock { get; set; }
/// <summary>
/// The block where both branches merge back together (if applicable)
/// </summary>
public InstructionBlock? MergeBlock { get; set; }
/// <summary>
/// Whether this is a complete if-else structure with a merge point
/// </summary>
public bool IsComplete { get; set; }
/// <summary>
/// Nested if-else structure in the 'then' branch (if any)
/// </summary>
public IfElseStructure? NestedThenStructure { get; set; }
/// <summary>
/// Nested if-else structure in the 'else' branch (if any)
/// </summary>
public IfElseStructure? NestedElseStructure { get; set; }
}
/// <summary>
/// Represents a switch statement in the control flow graph
/// </summary>
public class SwitchStructure
{
/// <summary>
/// The block containing the switch header
/// </summary>
public InstructionBlock HeaderBlock { get; set; } = null!;
/// <summary>
/// The cases of the switch statement
/// </summary>
public List<SwitchCase> Cases { get; set; } = [];
}
/// <summary>
/// Represents a case in a switch statement
/// </summary>
public class SwitchCase
{
/// <summary>
/// The value of the case
/// </summary>
public int Value { get; set; }
/// <summary>
/// The block containing the case code
/// </summary>
public InstructionBlock CaseBlock { get; set; } = null!;
}
}

View File

@ -1,384 +0,0 @@
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes data flow through instructions to track register values
/// </summary>
public class DataFlowAnalyzer
{
// Constants for analysis data keys
private const string REGISTER_VALUE_KEY = "RegisterValue";
private const string MEMORY_VALUE_KEY = "MemoryValue";
/// <summary>
/// Represents a known value for a register or memory location
/// </summary>
public class ValueInfo
{
/// <summary>
/// The type of value (constant, register, memory, unknown)
/// </summary>
public enum ValueType
{
Unknown,
Constant,
Register,
Memory
}
/// <summary>
/// The type of this value
/// </summary>
public ValueType Type { get; set; } = ValueType.Unknown;
/// <summary>
/// The constant value (if Type is Constant)
/// </summary>
public ulong? ConstantValue { get; set; }
/// <summary>
/// The source register (if Type is Register)
/// </summary>
public RegisterIndex? SourceRegister { get; set; }
/// <summary>
/// The memory address or expression (if Type is Memory)
/// </summary>
public string? MemoryExpression { get; set; }
/// <summary>
/// The instruction that defined this value
/// </summary>
public Instruction? DefiningInstruction { get; set; }
/// <summary>
/// Returns a string representation of the value
/// </summary>
public override string ToString()
{
return Type switch
{
ValueType.Constant => $"0x{ConstantValue:X8}",
ValueType.Register => $"{SourceRegister}",
ValueType.Memory => $"[{MemoryExpression}]",
_ => "unknown"
};
}
}
/// <summary>
/// Analyzes data flow in the function and stores results in the analyzer context
/// </summary>
/// <param name="context">The analyzer context to store results in</param>
public void AnalyzeDataFlow(AnalyzerContext context)
{
// Process each block in order
foreach (var block in context.Function.Blocks)
{
// Dictionary to track register values within this block
Dictionary<RegisterIndex, ValueInfo> registerValues = new();
// Process each instruction in the block
foreach (var instruction in block.Instructions)
{
// Process the instruction based on its type
ProcessInstruction(instruction, registerValues, context);
// Store the current register state at this instruction's address
StoreRegisterState(instruction.Address, registerValues, context);
}
}
}
/// <summary>
/// Processes an instruction to update register values
/// </summary>
/// <param name="instruction">The instruction to process</param>
/// <param name="registerValues">The current register values</param>
/// <param name="context">The analyzer context</param>
private void ProcessInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues, AnalyzerContext context)
{
// Handle different instruction types
switch (instruction.Type)
{
// MOV instructions
case InstructionType.Mov:
ProcessMovInstruction(instruction, registerValues);
break;
// XOR instructions
case InstructionType.Xor:
ProcessXorInstruction(instruction, registerValues);
break;
// ADD instructions
case InstructionType.Add:
ProcessAddInstruction(instruction, registerValues);
break;
// SUB instructions
case InstructionType.Sub:
ProcessSubInstruction(instruction, registerValues);
break;
// PUSH/POP instructions can affect register values
case InstructionType.Pop:
ProcessPopInstruction(instruction, registerValues);
break;
// Call instructions typically clobber certain registers
case InstructionType.Call:
ProcessCallInstruction(instruction, registerValues);
break;
// Other instructions that modify registers
default:
// For now, mark destination registers as unknown for unsupported instructions
if (instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand regOp)
{
registerValues[regOp.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
break;
}
}
/// <summary>
/// Processes a MOV instruction to update register values
/// </summary>
private void ProcessMovInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// Handle different MOV variants
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// MOV reg, imm
if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc)
{
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Constant,
ConstantValue = immSrc.Value,
DefiningInstruction = instruction
};
}
// MOV reg, reg
else if (dest is RegisterOperand destReg2 && src is RegisterOperand srcReg)
{
if (registerValues.TryGetValue(srcReg.Register, out var srcValue))
{
// Copy the source value
registerValues[destReg2.Register] = new ValueInfo
{
Type = srcValue.Type,
ConstantValue = srcValue.ConstantValue,
SourceRegister = srcValue.SourceRegister,
MemoryExpression = srcValue.MemoryExpression,
DefiningInstruction = instruction
};
}
else
{
// Source register value is unknown
registerValues[destReg2.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Register,
SourceRegister = srcReg.Register,
DefiningInstruction = instruction
};
}
}
// MOV reg, [mem]
else if (dest is RegisterOperand destReg3 && src is MemoryOperand memSrc)
{
registerValues[destReg3.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Memory,
MemoryExpression = memSrc.ToString(),
DefiningInstruction = instruction
};
}
// MOV [mem], reg or MOV [mem], imm
// These don't update register values, so we don't need to handle them here
}
}
/// <summary>
/// Processes an XOR instruction to update register values
/// </summary>
private void ProcessXorInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// Handle XOR reg, reg (often used for zeroing a register)
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// XOR reg, same_reg (zeroing idiom)
if (dest is RegisterOperand destReg && src is RegisterOperand srcReg &&
destReg.Register == srcReg.Register)
{
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Constant,
ConstantValue = 0,
DefiningInstruction = instruction
};
}
// Other XOR operations make the result unknown
else if (dest is RegisterOperand destReg2)
{
registerValues[destReg2.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
}
}
/// <summary>
/// Processes an ADD instruction to update register values
/// </summary>
private void ProcessAddInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// Handle ADD reg, imm where we know the register value
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// ADD reg, imm where reg is a known constant
if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc &&
registerValues.TryGetValue(destReg.Register, out var destValue) &&
destValue.Type == ValueInfo.ValueType.Constant &&
destValue.ConstantValue.HasValue)
{
// Calculate the new constant value
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Constant,
ConstantValue = (uint?) (destValue.ConstantValue.Value + immSrc.Value),
DefiningInstruction = instruction
};
}
// Other ADD operations make the result unknown
else if (dest is RegisterOperand destReg2)
{
registerValues[destReg2.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
}
}
/// <summary>
/// Processes a SUB instruction to update register values
/// </summary>
private void ProcessSubInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// Handle SUB reg, imm where we know the register value
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// SUB reg, imm where reg is a known constant
if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc &&
registerValues.TryGetValue(destReg.Register, out var destValue) &&
destValue.Type == ValueInfo.ValueType.Constant &&
destValue.ConstantValue.HasValue)
{
// Calculate the new constant value
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Constant,
ConstantValue = (uint?) (destValue.ConstantValue.Value - immSrc.Value),
DefiningInstruction = instruction
};
}
// Other SUB operations make the result unknown
else if (dest is RegisterOperand destReg2)
{
registerValues[destReg2.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
}
}
/// <summary>
/// Processes a POP instruction to update register values
/// </summary>
private void ProcessPopInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// POP reg makes the register value unknown (comes from stack)
if (instruction.StructuredOperands.Count >= 1 &&
instruction.StructuredOperands[0] is RegisterOperand destReg)
{
registerValues[destReg.Register] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
}
/// <summary>
/// Processes a CALL instruction to update register values
/// </summary>
private void ProcessCallInstruction(Instruction instruction, Dictionary<RegisterIndex, ValueInfo> registerValues)
{
// CALL instructions typically clobber EAX, ECX, and EDX in x86 calling conventions
registerValues[RegisterIndex.A] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
registerValues[RegisterIndex.C] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
registerValues[RegisterIndex.D] = new ValueInfo
{
Type = ValueInfo.ValueType.Unknown,
DefiningInstruction = instruction
};
}
/// <summary>
/// Stores the current register state at the given address
/// </summary>
private void StoreRegisterState(ulong address, Dictionary<RegisterIndex, ValueInfo> registerValues, AnalyzerContext context)
{
// Create a copy of the register values to store
var registerValuesCopy = new Dictionary<RegisterIndex, ValueInfo>(registerValues);
// Store in the context
context.StoreAnalysisData(address, REGISTER_VALUE_KEY, registerValuesCopy);
}
/// <summary>
/// Gets the register values at the given address
/// </summary>
public static Dictionary<string, ValueInfo>? GetRegisterValues(ulong address, AnalyzerContext context)
{
return context.GetAnalysisData<Dictionary<string, ValueInfo>>(address, REGISTER_VALUE_KEY);
}
}

View File

@ -1,149 +0,0 @@
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.PE;
using X86Disassembler.X86;
namespace X86Disassembler.Analysers;
/// <summary>
/// Main engine for decompiling x86 code
/// </summary>
public class DecompilerEngine
{
/// <summary>
/// The PE file being analyzed
/// </summary>
private readonly PeFile _peFile;
/// <summary>
/// Dictionary of analyzed functions by address
/// </summary>
private readonly Dictionary<ulong, Function> _functions = [];
/// <summary>
/// Dictionary of exported function names by address
/// </summary>
private readonly Dictionary<ulong, string> _exportedFunctions = [];
/// <summary>
/// Creates a new decompiler engine for the specified PE file
/// </summary>
/// <param name="peFile">The PE file to decompile</param>
public DecompilerEngine(PeFile peFile)
{
_peFile = peFile;
// Initialize the exported functions dictionary
foreach (var export in peFile.ExportedFunctions)
{
_exportedFunctions[export.AddressRva] = export.Name;
}
}
/// <summary>
/// Decompiles a function at the specified address
/// </summary>
/// <param name="address">The address of the function to decompile</param>
/// <returns>The decompiled function</returns>
public Function DecompileFunction(ulong address)
{
// Check if we've already analyzed this function
if (_functions.TryGetValue(address, out var existingFunction))
{
return existingFunction;
}
// Find the code section containing this address
var codeSection = _peFile.SectionHeaders.Find(s =>
s.ContainsCode() &&
address >= s.VirtualAddress &&
address < s.VirtualAddress + s.VirtualSize);
if (codeSection == null)
{
throw new InvalidOperationException($"No code section found containing address 0x{address:X8}");
}
// Get the section data
int sectionIndex = _peFile.SectionHeaders.IndexOf(codeSection);
byte[] codeBytes = _peFile.GetSectionData(sectionIndex);
// Create a disassembler for the code section
var disassembler = new BlockDisassembler(codeBytes, codeSection.VirtualAddress);
// Disassemble the function
var asmFunction = disassembler.DisassembleFromAddress((uint)address);
// Create an analyzer context
var context = new AnalyzerContext(asmFunction);
// Run the analyzers
var loopAnalyzer = new LoopAnalyzer();
loopAnalyzer.AnalyzeLoops(context);
var dataFlowAnalyzer = new DataFlowAnalyzer();
dataFlowAnalyzer.AnalyzeDataFlow(context);
// Get the function name from exports if available
string functionName = _exportedFunctions.TryGetValue(address, out var name)
? name
: $"func_{address:X8}";
// Analyze the function
var functionAnalyzer = new FunctionAnalyzer(context);
var function = functionAnalyzer.AnalyzeFunction(address, functionName);
// Analyze control flow structures
var controlFlowAnalyzer = new ControlFlowAnalyzer(context);
controlFlowAnalyzer.AnalyzeControlFlow(function);
// Store the function in our cache
_functions[address] = function;
return function;
}
/// <summary>
/// Generates C-like pseudocode for a decompiled function
/// </summary>
/// <param name="function">The function to generate pseudocode for</param>
/// <returns>The generated pseudocode</returns>
public string GeneratePseudocode(Function function)
{
// Create a pseudocode generator
var generator = new PseudocodeGenerator();
// Generate the pseudocode
return generator.GeneratePseudocode(function);
}
/// <summary>
/// Decompiles all exported functions in the PE file
/// </summary>
/// <returns>A dictionary of decompiled functions by address</returns>
public Dictionary<ulong, Function> DecompileAllExportedFunctions()
{
foreach (var export in _peFile.ExportedFunctions)
{
// Skip forwarded exports
if (export.IsForwarder)
{
continue;
}
try
{
DecompileFunction(export.AddressRva);
}
catch (Exception ex)
{
Console.WriteLine($"Error decompiling function {export.Name} at 0x{export.AddressRva:X8}: {ex.Message}");
}
}
return _functions;
}
}

View File

@ -1,58 +0,0 @@
namespace X86Disassembler.Analysers.DecompilerTypes;
/// <summary>
/// Represents a calling convention used by a function
/// </summary>
public enum CallingConvention
{
/// <summary>
/// C declaration calling convention (caller cleans the stack)
/// Parameters are pushed right-to-left
/// EAX, ECX, EDX are caller-saved
/// EBX, ESI, EDI, EBP are callee-saved
/// Return value in EAX (or EDX:EAX for 64-bit values)
/// </summary>
Cdecl,
/// <summary>
/// Standard calling convention (callee cleans the stack)
/// Parameters are pushed right-to-left
/// EAX, ECX, EDX are caller-saved
/// EBX, ESI, EDI, EBP are callee-saved
/// Return value in EAX (or EDX:EAX for 64-bit values)
/// </summary>
Stdcall,
/// <summary>
/// Fast calling convention
/// First two parameters in ECX and EDX, rest on stack right-to-left
/// EAX, ECX, EDX are caller-saved
/// EBX, ESI, EDI, EBP are callee-saved
/// Return value in EAX
/// Callee cleans the stack
/// </summary>
Fastcall,
/// <summary>
/// This calling convention (C++ member functions)
/// 'this' pointer in ECX, other parameters pushed right-to-left
/// EAX, ECX, EDX are caller-saved
/// EBX, ESI, EDI, EBP are callee-saved
/// Return value in EAX
/// Caller cleans the stack
/// </summary>
Thiscall,
/// <summary>
/// Microsoft vectorcall convention
/// First six parameters in registers (XMM0-XMM5 for floating point, ECX, EDX, R8, R9 for integers)
/// Additional parameters pushed right-to-left
/// Return value in EAX or XMM0
/// </summary>
Vectorcall,
/// <summary>
/// Unknown calling convention
/// </summary>
Unknown
}

View File

@ -1,190 +0,0 @@
namespace X86Disassembler.Analysers.DecompilerTypes;
/// <summary>
/// Represents a data type in decompiled code
/// </summary>
public class DataType
{
/// <summary>
/// The category of the data type
/// </summary>
public enum TypeCategory
{
/// <summary>
/// Unknown type
/// </summary>
Unknown,
/// <summary>
/// Void type (no value)
/// </summary>
Void,
/// <summary>
/// Integer type
/// </summary>
Integer,
/// <summary>
/// Floating point type
/// </summary>
Float,
/// <summary>
/// Pointer type
/// </summary>
Pointer,
/// <summary>
/// Structure type
/// </summary>
Struct,
/// <summary>
/// Array type
/// </summary>
Array,
/// <summary>
/// Function type
/// </summary>
Function
}
/// <summary>
/// The name of the type
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// The category of the type
/// </summary>
public TypeCategory Category { get; set; }
/// <summary>
/// The size of the type in bytes
/// </summary>
public int Size { get; set; }
/// <summary>
/// Whether the type is signed (for integer types)
/// </summary>
public bool IsSigned { get; set; }
/// <summary>
/// The pointed-to type (for pointer types)
/// </summary>
public DataType? PointedType { get; set; }
/// <summary>
/// The element type (for array types)
/// </summary>
public DataType? ElementType { get; set; }
/// <summary>
/// The number of elements (for array types)
/// </summary>
public int? ElementCount { get; set; }
/// <summary>
/// The fields of the structure (for struct types)
/// </summary>
public List<StructField> Fields { get; set; } = [];
/// <summary>
/// Creates a new data type with the specified name and category
/// </summary>
/// <param name="name">The name of the type</param>
/// <param name="category">The category of the type</param>
/// <param name="size">The size of the type in bytes</param>
public DataType(string name, TypeCategory category, int size)
{
Name = name;
Category = category;
Size = size;
}
/// <summary>
/// Returns a string representation of the type
/// </summary>
public override string ToString()
{
return Name;
}
/// <summary>
/// Creates a pointer type to the specified type
/// </summary>
/// <param name="pointedType">The type being pointed to</param>
/// <returns>A new pointer type</returns>
public static DataType CreatePointerType(DataType pointedType)
{
return new DataType($"{pointedType.Name}*", TypeCategory.Pointer, 4)
{
PointedType = pointedType
};
}
/// <summary>
/// Creates an array type of the specified element type and count
/// </summary>
/// <param name="elementType">The type of the array elements</param>
/// <param name="count">The number of elements in the array</param>
/// <returns>A new array type</returns>
public static DataType CreateArrayType(DataType elementType, int count)
{
return new DataType($"{elementType.Name}[{count}]", TypeCategory.Array, elementType.Size * count)
{
ElementType = elementType,
ElementCount = count
};
}
/// <summary>
/// Common predefined types
/// </summary>
public static readonly DataType Unknown = new DataType("unknown", TypeCategory.Unknown, 0);
public static readonly DataType Void = new DataType("void", TypeCategory.Void, 0);
public static readonly DataType Char = new DataType("char", TypeCategory.Integer, 1) { IsSigned = true };
public static readonly DataType UChar = new DataType("unsigned char", TypeCategory.Integer, 1);
public static readonly DataType Short = new DataType("short", TypeCategory.Integer, 2) { IsSigned = true };
public static readonly DataType UShort = new DataType("unsigned short", TypeCategory.Integer, 2);
public static readonly DataType Int = new DataType("int", TypeCategory.Integer, 4) { IsSigned = true };
public static readonly DataType UInt = new DataType("unsigned int", TypeCategory.Integer, 4);
public static readonly DataType Float = new DataType("float", TypeCategory.Float, 4);
public static readonly DataType Double = new DataType("double", TypeCategory.Float, 8);
}
/// <summary>
/// Represents a field in a structure
/// </summary>
public class StructField
{
/// <summary>
/// The name of the field
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// The type of the field
/// </summary>
public DataType Type { get; set; } = DataType.Unknown;
/// <summary>
/// The offset of the field within the structure
/// </summary>
public int Offset { get; set; }
/// <summary>
/// Creates a new structure field
/// </summary>
/// <param name="name">The name of the field</param>
/// <param name="type">The type of the field</param>
/// <param name="offset">The offset of the field within the structure</param>
public StructField(string name, DataType type, int offset)
{
Name = name;
Type = type;
Offset = offset;
}
}

View File

@ -1,98 +0,0 @@
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers.DecompilerTypes;
/// <summary>
/// Represents a function in decompiled code
/// </summary>
public class Function
{
/// <summary>
/// The name of the function
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// The address of the function
/// </summary>
public ulong Address { get; set; }
/// <summary>
/// The return type of the function
/// </summary>
public DataType ReturnType { get; set; } = DataType.Void;
/// <summary>
/// The parameters of the function
/// </summary>
public List<Variable> Parameters { get; set; } = [];
/// <summary>
/// Local variables in this function
/// </summary>
public List<Variable> LocalVariables { get; } = [];
/// <summary>
/// Variables stored in registers
/// </summary>
public List<Variable> RegisterVariables { get; } = [];
/// <summary>
/// The calling convention used by the function
/// </summary>
public CallingConvention CallingConvention { get; set; } = CallingConvention.Cdecl;
/// <summary>
/// The assembly function representation
/// </summary>
public AsmFunction AsmFunction { get; set; }
/// <summary>
/// Creates a new function with the specified name and address
/// </summary>
/// <param name="name">The name of the function</param>
/// <param name="address">The address of the function</param>
/// <param name="asmFunction">The assembly function representation</param>
public Function(string name, ulong address, AsmFunction asmFunction)
{
Name = name;
Address = address;
AsmFunction = asmFunction;
}
/// <summary>
/// Analyzes the function to identify variables
/// </summary>
public void AnalyzeVariables()
{
// Create a variable analyzer
var variableAnalyzer = new VariableAnalyzer(AsmFunction.Context);
// Analyze stack variables
variableAnalyzer.AnalyzeStackVariables(this);
}
/// <summary>
/// Returns a string representation of the function signature
/// </summary>
public string GetSignature()
{
string paramList = string.Join(", ", Parameters.Select(p => $"{p.Type} {p.Name}"));
return $"{ReturnType} {Name}({paramList})";
}
/// <summary>
/// Returns a string representation of the function
/// </summary>
public override string ToString()
{
return GetSignature();
}
}

View File

@ -1,102 +0,0 @@
namespace X86Disassembler.Analysers.DecompilerTypes;
/// <summary>
/// Represents a variable in decompiled code
/// </summary>
public class Variable
{
/// <summary>
/// The type of storage for a variable
/// </summary>
public enum StorageType
{
/// <summary>
/// Variable stored on the stack (local variable)
/// </summary>
Stack,
/// <summary>
/// Variable stored in a register
/// </summary>
Register,
/// <summary>
/// Variable stored in global memory
/// </summary>
Global,
/// <summary>
/// Function parameter passed on the stack
/// </summary>
Parameter,
/// <summary>
/// Function parameter passed in a register
/// </summary>
RegisterParameter
}
/// <summary>
/// The name of the variable
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// The type of the variable
/// </summary>
public DataType Type { get; set; } = DataType.Unknown;
/// <summary>
/// The storage location of the variable
/// </summary>
public StorageType Storage { get; set; }
/// <summary>
/// The offset from the base pointer (for stack variables)
/// </summary>
public int? StackOffset { get; set; }
/// <summary>
/// The register that holds this variable (for register variables)
/// </summary>
public X86.RegisterIndex? Register { get; set; }
/// <summary>
/// The memory address (for global variables)
/// </summary>
public ulong? Address { get; set; }
/// <summary>
/// The size of the variable in bytes
/// </summary>
public int Size { get; set; }
/// <summary>
/// Whether this variable is a function parameter
/// </summary>
public bool IsParameter { get; set; }
/// <summary>
/// The parameter index (if this is a parameter)
/// </summary>
public int? ParameterIndex { get; set; }
/// <summary>
/// Creates a new variable with the specified name and type
/// </summary>
/// <param name="name">The name of the variable</param>
/// <param name="type">The type of the variable</param>
public Variable(string name, DataType type)
{
Name = name;
Type = type;
}
/// <summary>
/// Returns a string representation of the variable
/// </summary>
public override string ToString()
{
return $"{Type} {Name}";
}
}

View File

@ -0,0 +1,56 @@
namespace X86Disassembler.Analysers;
public abstract class Address(ulong value, ulong imageBase)
{
/// <summary>
/// The actual value of the address, not specifically typed.
/// </summary>
protected readonly ulong Value = value;
/// <summary>
/// PE.ImageBase from which this address is constructed
/// </summary>
protected readonly ulong ImageBase = imageBase;
}
/// <summary>
/// Absolute address in the PE file
/// </summary>
public class FileAbsoluteAddress(ulong value, ulong imageBase) : Address(value, imageBase)
{
public ulong GetValue()
{
return Value;
}
public virtual VirtualAddress AsImageBaseAddress()
{
return new VirtualAddress(Value + ImageBase, ImageBase);
}
public virtual FileAbsoluteAddress AsFileAbsolute()
{
return this;
}
}
/// <summary>
/// Address from PE.ImageBase
/// </summary>
public class VirtualAddress : FileAbsoluteAddress
{
public VirtualAddress(ulong value, ulong imageBase) : base(value, imageBase)
{
}
public override VirtualAddress AsImageBaseAddress()
{
return this;
}
public override FileAbsoluteAddress AsFileAbsolute()
{
return new FileAbsoluteAddress(Value - ImageBase, ImageBase);
}
}

View File

@ -1,132 +0,0 @@
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes disassembled functions to identify variables, parameters, and control flow structures
/// </summary>
public class FunctionAnalyzer
{
/// <summary>
/// The analyzer context
/// </summary>
private readonly AnalyzerContext _context;
/// <summary>
/// Creates a new function analyzer
/// </summary>
/// <param name="context">The analyzer context</param>
public FunctionAnalyzer(AnalyzerContext context)
{
_context = context;
}
/// <summary>
/// Analyzes a function at the specified address
/// </summary>
/// <param name="address">The address of the function</param>
/// <param name="name">The name of the function (if known)</param>
/// <returns>The analyzed function</returns>
public Function AnalyzeFunction(ulong address, string name = "")
{
// If no name is provided, generate one based on the address
if (string.IsNullOrEmpty(name))
{
name = $"func_{address:X8}";
}
// Create a function object
var function = new Function(name, address, _context.Function)
{
ReturnType = DataType.Unknown // Default to unknown return type
};
// Create a variable analyzer and analyze variables
var variableAnalyzer = new VariableAnalyzer(_context);
variableAnalyzer.AnalyzeStackVariables(function);
// Determine the calling convention
DetermineCallingConvention(function);
// Infer parameter and return types
InferTypes(function);
return function;
}
/// <summary>
/// Determines the calling convention of a function based on its behavior
/// </summary>
/// <param name="function">The function to analyze</param>
private void DetermineCallingConvention(Function function)
{
// By default, we'll assume cdecl
function.CallingConvention = CallingConvention.Cdecl;
// Get the exit blocks (blocks with ret instructions)
var exitBlocks = function.AsmFunction.Blocks.Where(b =>
b.Instructions.Count > 0 &&
b.Instructions.Last().Type == InstructionType.Ret).ToList();
// Check if the function cleans up its own stack
bool cleansOwnStack = false;
// Look for ret instructions with an immediate operand
foreach (var block in function.AsmFunction.Blocks)
{
var lastInstruction = block.Instructions.LastOrDefault();
if (lastInstruction != null && lastInstruction.Type == InstructionType.Ret)
{
// If the ret instruction has an immediate operand, it's cleaning its own stack
if (lastInstruction.StructuredOperands.Count > 0 &&
lastInstruction.StructuredOperands[0] is ImmediateOperand immOp &&
immOp.Value > 0)
{
cleansOwnStack = true;
break;
}
}
}
// If the function cleans its own stack, it's likely stdcall
if (cleansOwnStack)
{
function.CallingConvention = CallingConvention.Stdcall;
// Check for thiscall (ECX used for this pointer)
// This would require more sophisticated analysis of register usage
}
// Check for fastcall (first two parameters in ECX and EDX)
// This would require more sophisticated analysis of register usage
}
/// <summary>
/// Infers types for parameters and local variables based on their usage
/// </summary>
/// <param name="function">The function to analyze</param>
private void InferTypes(Function function)
{
// This is a complex analysis that would require tracking how variables are used
// For now, we'll just set default types
// Set return type based on register usage
function.ReturnType = DataType.Int; // Default to int
// For each parameter, try to infer its type
foreach (var param in function.Parameters)
{
// Default to int for now
param.Type = DataType.Int;
}
// For each local variable, try to infer its type
foreach (var localVar in function.LocalVariables)
{
// Default to int for now
localVar.Type = DataType.Int;
}
}
}

View File

@ -1,120 +0,0 @@
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes the control flow graph to identify loops
/// </summary>
public class LoopAnalyzer
{
/// <summary>
/// Identifies loops in the given function and stores them in the analyzer context
/// </summary>
/// <param name="context">The analyzer context to store results in</param>
public void AnalyzeLoops(AnalyzerContext context)
{
// A back edge is an edge from a node to one of its dominators
// For our simplified approach, we'll identify back edges as edges that point to blocks
// with a lower address (potential loop headers)
foreach (var block in context.Function.Blocks)
{
foreach (var successor in block.Successors)
{
// If the successor has a lower address than the current block,
// it's potentially a back edge forming a loop
if (successor.Address < block.Address)
{
// Create a new loop with the identified back edge
var loop = new AnalyzerContext.Loop
{
Header = successor,
BackEdge = (block, successor)
};
// Find all blocks in the loop using a breadth-first search
FindLoopBlocks(loop);
// Find the exit blocks of the loop
FindLoopExits(loop);
// Store the loop in the context
context.LoopsByHeaderAddress[successor.Address] = loop;
// Update the blocks-to-loops mapping
foreach (var loopBlock in loop.Blocks)
{
if (!context.LoopsByBlockAddress.TryGetValue(loopBlock.Address, out var loops))
{
loops = [];
context.LoopsByBlockAddress[loopBlock.Address] = loops;
}
loops.Add(loop);
}
}
}
}
}
/// <summary>
/// Finds all blocks that are part of the loop
/// </summary>
/// <param name="loop">The loop to analyze</param>
private void FindLoopBlocks(AnalyzerContext.Loop loop)
{
// Start with the header block
loop.Blocks.Add(loop.Header);
// Use a queue for breadth-first search
Queue<InstructionBlock> queue = new Queue<InstructionBlock>();
queue.Enqueue(loop.BackEdge.From); // Start from the back edge source
// Keep track of visited blocks to avoid cycles
HashSet<ulong> visited = new HashSet<ulong> { loop.Header.Address };
while (queue.Count > 0)
{
var block = queue.Dequeue();
// If we've already processed this block, skip it
if (!visited.Add(block.Address))
{
continue;
}
// Add the block to the loop
loop.Blocks.Add(block);
// Add all predecessors to the queue (except those that would take us outside the loop)
foreach (var predecessor in block.Predecessors)
{
// Skip the header's predecessors that aren't in the loop already
// (to avoid including blocks outside the loop)
if (block == loop.Header && !loop.Blocks.Contains(predecessor) && predecessor != loop.BackEdge.From)
{
continue;
}
queue.Enqueue(predecessor);
}
}
}
/// <summary>
/// Finds all exit blocks of the loop (blocks that have successors outside the loop)
/// </summary>
/// <param name="loop">The loop to analyze</param>
private void FindLoopExits(AnalyzerContext.Loop loop)
{
foreach (var block in loop.Blocks)
{
foreach (var successor in block.Successors)
{
// If the successor is not part of the loop, this block is an exit
if (!loop.Blocks.Contains(successor))
{
loop.ExitBlocks.Add(block);
break; // Once we've identified this block as an exit, we can stop checking its successors
}
}
}
}
}

View File

@ -1,862 +0,0 @@
using System.Text;
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Generates C-like pseudocode from decompiled functions
/// </summary>
public class PseudocodeGenerator
{
/// <summary>
/// Generates pseudocode for a decompiled function
/// </summary>
/// <param name="function">The function to generate pseudocode for</param>
/// <returns>The generated pseudocode</returns>
public string GeneratePseudocode(Function function)
{
var result = new StringBuilder();
// Add function signature
result.AppendLine($"{function.ReturnType} {function.Name}({string.Join(", ", function.Parameters.Select(p => $"{p.Type} {p.Name}"))})")
.AppendLine("{");
// Add local variable declarations
foreach (var localVar in function.LocalVariables)
{
result.AppendLine($" {localVar.Type} {localVar.Name}; // Stack offset: {localVar.StackOffset}");
}
// Add register variable declarations
foreach (var regVar in function.RegisterVariables)
{
result.AppendLine($" {regVar.Type} {regVar.Name}; // Register: {RegisterMapper.GetRegisterName(regVar.Register!.Value, 32)}");
}
if (function.LocalVariables.Count > 0 || function.RegisterVariables.Count > 0)
{
result.AppendLine();
}
// Generate the function body using control flow analysis
GenerateFunctionBody(function, result, 1);
// Add a return statement
result.AppendLine()
.AppendLine(" return 0; // Placeholder return value")
.AppendLine("}");
return result.ToString();
}
/// <summary>
/// Generates the body of the function using control flow analysis
/// </summary>
/// <param name="function">The function to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
private void GenerateFunctionBody(Function function, StringBuilder result, int indentLevel)
{
// Try to find the entry block
var entryBlock = function.AsmFunction.EntryBlock;
// If the entry block is not found, try to find a block with an address that matches the function address minus the base address
if (entryBlock == null && function.AsmFunction.Blocks.Count > 0)
{
// Get the first block as a fallback
entryBlock = function.AsmFunction.Blocks[0];
// Log a warning but continue with the first block
result.AppendLine($"{new string(' ', indentLevel * 4)}// Warning: Entry block not found at address 0x{function.Address:X8}, using first block at 0x{entryBlock.Address:X8}");
}
else if (entryBlock == null)
{
result.AppendLine($"{new string(' ', indentLevel * 4)}// Function body could not be decompiled - no blocks found");
return;
}
// Process blocks in order, starting from the entry block
var processedBlocks = new HashSet<ulong>();
GenerateBlockCode(function, entryBlock, result, indentLevel, processedBlocks);
}
/// <summary>
/// Generates code for a basic block and its successors
/// </summary>
/// <param name="function">The function containing the block</param>
/// <param name="block">The block to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Check if we've already processed this block
if (processedBlocks.Contains(block.Address))
{
return;
}
// Mark this block as processed
processedBlocks.Add(block.Address);
// Check if this block is part of a control flow structure
var context = function.AsmFunction.Context;
// Check for if-else structure
var ifElseStructure = context.GetAnalysisData<ControlFlowAnalyzer.IfElseStructure>(block.Address, "IfElseStructure");
if (ifElseStructure != null && ifElseStructure.ConditionBlock.Address == block.Address)
{
// This block is the condition of an if-else structure
GenerateIfElseCode(function, ifElseStructure, result, indentLevel, processedBlocks);
return;
}
// Check for switch structure
var switchStructure = context.GetAnalysisData<ControlFlowAnalyzer.SwitchStructure>(block.Address, "SwitchStructure");
if (switchStructure != null && switchStructure.HeaderBlock.Address == block.Address)
{
// This block is the header of a switch structure
GenerateSwitchCode(function, switchStructure, result, indentLevel, processedBlocks);
return;
}
// Check if this block is part of a loop
var loops = context.LoopsByBlockAddress.TryGetValue(block.Address, out var blockLoops) ? blockLoops : null;
if (loops != null && loops.Count > 0)
{
// Get the innermost loop
var loop = loops[0];
// Check if this is the loop header
if (loop.Header.Address == block.Address)
{
// This block is the header of a loop
GenerateLoopCode(function, loop, result, indentLevel, processedBlocks);
return;
}
}
// If we get here, this is a regular block
GenerateRegularBlockCode(function, block, result, indentLevel, processedBlocks);
}
/// <summary>
/// Generates code for a regular basic block
/// </summary>
/// <param name="function">The function containing the block</param>
/// <param name="block">The block to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateRegularBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Add a comment with the block address
result.AppendLine($"{new string(' ', indentLevel * 4)}// Block at 0x{block.Address:X8}");
// Check if this block ends with a conditional jump
bool hasConditionalJump = block.Instructions.Count > 0 &&
IsConditionalJump(block.Instructions[^1].Type);
// Add debug info about conditional jumps
if (hasConditionalJump)
{
var jumpInstruction = block.Instructions[^1];
result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Conditional jump {jumpInstruction} detected");
// Get the jump target address
ulong targetAddress = GetJumpTargetAddress(jumpInstruction);
result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Jump target: 0x{targetAddress:X8}");
// Check if we can find a comparison instruction before the jump
Instruction? comparisonInstruction = null;
for (int i = block.Instructions.Count - 2; i >= 0 && i >= block.Instructions.Count - 5; i--)
{
var instruction = block.Instructions[i];
if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
{
comparisonInstruction = instruction;
break;
}
}
if (comparisonInstruction != null)
{
result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Found comparison: {comparisonInstruction}");
}
else
{
result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: No comparison instruction found");
}
}
// If this block has a conditional jump but wasn't detected as an if-else structure,
// we'll create an inline if statement for better readability
if (hasConditionalJump && block.Successors.Count == 2)
{
// Get the last instruction (conditional jump)
var jumpInstruction = block.Instructions[^1];
// Generate condition based on the jump type
string condition = GenerateConditionFromJump(jumpInstruction);
// Generate code for all instructions except the last one (the jump)
for (int i = 0; i < block.Instructions.Count - 1; i++)
{
var instruction = block.Instructions[i];
// Skip prologue/epilogue instructions
if (IsPrologueOrEpilogueInstruction(instruction))
{
continue;
}
// Generate pseudocode for this instruction
var pseudocode = GenerateInstructionPseudocode(function, instruction);
if (!string.IsNullOrEmpty(pseudocode))
{
result.AppendLine($"{new string(' ', indentLevel * 4)}{pseudocode}");
}
else
{
// If we couldn't generate pseudocode, add the instruction as a comment
result.AppendLine($"{new string(' ', indentLevel * 4)}/* {instruction} */;");
}
}
// Generate the if statement
result.AppendLine($"{new string(' ', indentLevel * 4)}if ({condition})");
result.AppendLine($"{new string(' ', indentLevel * 4)}{{");
// Find the target block (true branch)
var targetAddress = GetJumpTargetAddress(jumpInstruction);
var targetBlock = block.Successors.FirstOrDefault(s => s.Address == targetAddress);
if (targetBlock != null)
{
// Generate code for the target block
GenerateBlockCode(function, targetBlock, result, indentLevel + 1, processedBlocks);
}
result.AppendLine($"{new string(' ', indentLevel * 4)}}}");
// Find the fallthrough block (false branch)
var fallthroughBlock = block.Successors.FirstOrDefault(s => s.Address != targetAddress);
if (fallthroughBlock != null && !processedBlocks.Contains(fallthroughBlock.Address))
{
// Generate code for the fallthrough block
GenerateBlockCode(function, fallthroughBlock, result, indentLevel, processedBlocks);
}
}
else
{
// Regular block processing
// Generate code for each instruction in the block
foreach (var instruction in block.Instructions)
{
// Skip prologue/epilogue instructions
if (IsPrologueOrEpilogueInstruction(instruction))
{
continue;
}
// Generate pseudocode for this instruction
var pseudocode = GenerateInstructionPseudocode(function, instruction);
if (!string.IsNullOrEmpty(pseudocode))
{
result.AppendLine($"{new string(' ', indentLevel * 4)}{pseudocode}");
}
else
{
// If we couldn't generate pseudocode, add the instruction as a comment
result.AppendLine($"{new string(' ', indentLevel * 4)}/* {instruction} */;");
}
}
// Process successors in order
foreach (var successor in block.Successors)
{
// Only process successors that haven't been processed yet
if (!processedBlocks.Contains(successor.Address))
{
GenerateBlockCode(function, successor, result, indentLevel, processedBlocks);
}
}
}
}
/// <summary>
/// Generates code for an if-else structure
/// </summary>
/// <param name="function">The function containing the structure</param>
/// <param name="ifElseStructure">The if-else structure to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateIfElseCode(Function function, ControlFlowAnalyzer.IfElseStructure ifElseStructure, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Mark the condition block as processed
processedBlocks.Add(ifElseStructure.ConditionBlock.Address);
// Generate the condition expression
string condition = GenerateConditionExpression(function, ifElseStructure.ConditionBlock);
// Add the if statement
string indent = new string(' ', indentLevel * 4);
result.AppendLine($"{indent}if ({condition})");
result.AppendLine($"{indent}{{");
// Check if the 'then' branch contains a nested if-else structure
if (ifElseStructure.NestedThenStructure != null)
{
// Generate code for the nested if-else structure in the 'then' branch
GenerateIfElseCode(function, ifElseStructure.NestedThenStructure, result, indentLevel + 1, processedBlocks);
}
else
{
// Generate code for the 'then' branch normally
GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks);
}
// Close the 'then' branch
result.AppendLine($"{indent}}}");
// Add the 'else' branch if it exists and is not already processed
if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address))
{
result.AppendLine($"{indent}else");
result.AppendLine($"{indent}{{");
// Check if the 'else' branch contains a nested if-else structure (else-if)
if (ifElseStructure.NestedElseStructure != null)
{
// Generate code for the nested if-else structure in the 'else' branch
GenerateIfElseCode(function, ifElseStructure.NestedElseStructure, result, indentLevel + 1, processedBlocks);
}
else
{
// Generate code for the 'else' branch normally
GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks);
}
// Close the 'else' branch
result.AppendLine($"{indent}}}");
}
// If this is a complete if-else structure with a merge point, and the merge point hasn't been processed yet
if (ifElseStructure.IsComplete && ifElseStructure.MergeBlock != null &&
!processedBlocks.Contains(ifElseStructure.MergeBlock.Address))
{
// Generate code for the merge block
GenerateBlockCode(function, ifElseStructure.MergeBlock, result, indentLevel, processedBlocks);
}
}
/// <summary>
/// Generates code for a switch structure
/// </summary>
/// <param name="function">The function containing the structure</param>
/// <param name="switchStructure">The switch structure to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateSwitchCode(Function function, ControlFlowAnalyzer.SwitchStructure switchStructure, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Mark the header block as processed
processedBlocks.Add(switchStructure.HeaderBlock.Address);
// Generate the switch expression
string switchExpr = "/* switch expression */";
// Add the switch statement
string indent = new string(' ', indentLevel * 4);
result.AppendLine($"{indent}// Switch structure at 0x{switchStructure.HeaderBlock.Address:X8}")
.AppendLine($"{indent}switch ({switchExpr})");
// Add the switch body
result.AppendLine($"{indent}{{")
.AppendLine();
// Generate code for each case
foreach (var switchCase in switchStructure.Cases)
{
// Add the case label
result.AppendLine($"{indent} case {switchCase.Value}:")
.AppendLine($"{indent} // Case block at 0x{switchCase.CaseBlock.Address:X8}");
// Generate code for the case block
GenerateBlockCode(function, switchCase.CaseBlock, result, indentLevel + 2, processedBlocks);
// Add a break statement
result.AppendLine($"{indent} break;")
.AppendLine();
}
// Add a default case
result.AppendLine($"{indent} default:")
.AppendLine($"{indent} // Default case")
.AppendLine($"{indent} break;");
// Close the switch body
result.AppendLine($"{indent}}}");
}
/// <summary>
/// Generates code for a loop structure
/// </summary>
/// <param name="function">The function containing the structure</param>
/// <param name="loop">The loop to generate code for</param>
/// <param name="result">The string builder to append to</param>
/// <param name="indentLevel">The current indentation level</param>
/// <param name="processedBlocks">Set of blocks that have already been processed</param>
private void GenerateLoopCode(Function function, AnalyzerContext.Loop loop, StringBuilder result, int indentLevel, HashSet<ulong> processedBlocks)
{
// Mark the header block as processed
processedBlocks.Add(loop.Header.Address);
// Add the loop header
string indent = new string(' ', indentLevel * 4);
result.AppendLine($"{indent}// Loop at 0x{loop.Header.Address:X8}")
.AppendLine($"{indent}while (true) // Simplified loop condition");
// Add the loop body
result.AppendLine($"{indent}{{")
.AppendLine($"{indent} // Loop body");
// Generate code for the loop body (starting with the header)
GenerateBlockCode(function, loop.Header, result, indentLevel + 1, processedBlocks);
// Close the loop body
result.AppendLine($"{indent}}}");
}
/// <summary>
/// Generates a condition expression for an if statement
/// </summary>
/// <param name="function">The function containing the block</param>
/// <param name="conditionBlock">The block containing the condition</param>
/// <returns>A string representing the condition expression</returns>
private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock)
{
// If the block is empty, return a placeholder
if (conditionBlock.Instructions.Count == 0)
{
return "condition";
}
// Get the last instruction (should be a conditional jump)
var lastInstruction = conditionBlock.Instructions[^1];
// If it's not a conditional jump, return a placeholder
if (!IsConditionalJump(lastInstruction.Type))
{
return "condition";
}
// Look for a CMP or TEST instruction that sets the flags for this jump
Instruction? comparisonInstruction = null;
// Search backwards from the jump instruction to find a comparison
for (int i = conditionBlock.Instructions.Count - 2; i >= 0; i--)
{
var instruction = conditionBlock.Instructions[i];
if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
{
comparisonInstruction = instruction;
break;
}
}
// If we found a comparison instruction, generate a condition based on it and the jump
if (comparisonInstruction != null && comparisonInstruction.StructuredOperands.Count >= 2)
{
var left = FormatOperand(comparisonInstruction.StructuredOperands[0]);
var right = FormatOperand(comparisonInstruction.StructuredOperands[1]);
// Generate condition based on jump type
return GenerateConditionFromJump(lastInstruction, left, right);
}
// If we couldn't find a comparison instruction, just use the jump condition
return GenerateConditionFromJump(lastInstruction, null, null);
}
/// <summary>
/// Generates pseudocode for a single instruction
/// </summary>
/// <param name="function">The function containing the instruction</param>
/// <param name="instruction">The instruction to generate pseudocode for</param>
/// <returns>The generated pseudocode</returns>
private string GenerateInstructionPseudocode(Function function, Instruction instruction)
{
// Check for special cases first
if (instruction.Type == InstructionType.Xor && instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// Check for XOR with self (zeroing a register)
if (dest is RegisterOperand regDest && src is RegisterOperand regSrc &&
regDest.Register == regSrc.Register)
{
// This is a common idiom to zero a register
return $"{FormatOperand(dest)} = 0; // XOR with self to zero register";
}
}
// Handle different instruction types
switch (instruction.Type)
{
case InstructionType.Mov:
// Handle MOV instruction
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// Special case for moving 0 (common initialization pattern)
if (src is ImmediateOperand immSrc && immSrc.Value == 0)
{
return $"{FormatOperand(dest)} = 0; // Initialize to zero";
}
return $"{FormatOperand(dest)} = {FormatOperand(src)};";
}
break;
case InstructionType.Add:
// Handle ADD instruction
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// Special case for adding 1 (increment)
if (src is ImmediateOperand immSrc && immSrc.Value == 1)
{
return $"{FormatOperand(dest)}++; // Increment";
}
return $"{FormatOperand(dest)} += {FormatOperand(src)};";
}
break;
case InstructionType.Sub:
// Handle SUB instruction
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// Special case for subtracting 1 (decrement)
if (src is ImmediateOperand immSrc && immSrc.Value == 1)
{
return $"{FormatOperand(dest)}--; // Decrement";
}
return $"{FormatOperand(dest)} -= {FormatOperand(src)};";
}
break;
case InstructionType.And:
// Handle AND instruction
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
return $"{FormatOperand(dest)} &= {FormatOperand(src)};";
}
break;
case InstructionType.Or:
// Handle OR instruction
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
return $"{FormatOperand(dest)} |= {FormatOperand(src)};";
}
break;
case InstructionType.Xor:
// Handle XOR instruction
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// We already handled the special case of XOR with self above
return $"{FormatOperand(dest)} ^= {FormatOperand(src)};";
}
break;
case InstructionType.Test:
// Handle TEST instruction (no assignment, just sets flags)
if (instruction.StructuredOperands.Count >= 2)
{
var left = instruction.StructuredOperands[0];
var right = instruction.StructuredOperands[1];
// Special case for TEST with self (checking if a register is zero)
if (left is RegisterOperand regLeft && right is RegisterOperand regRight &&
regLeft.Register == regRight.Register)
{
return $"// Check if {FormatOperand(left)} is zero";
}
return $"// Test {FormatOperand(left)} & {FormatOperand(right)}";
}
break;
case InstructionType.Cmp:
// Handle CMP instruction (no assignment, just sets flags)
if (instruction.StructuredOperands.Count >= 2)
{
var left = instruction.StructuredOperands[0];
var right = instruction.StructuredOperands[1];
// For CMP, we'll return a comment that explains what's being compared
// This will help with understanding the following conditional jumps
return $"// Compare {FormatOperand(left)} with {FormatOperand(right)}";
}
break;
case InstructionType.Call:
// Handle CALL instruction
if (instruction.StructuredOperands.Count >= 1)
{
var target = instruction.StructuredOperands[0];
// For function calls, we'll generate a proper function call expression
return $"{FormatOperand(target)}(); // Function call";
}
break;
case InstructionType.Ret:
// Handle RET instruction
return "return 0; // Placeholder return value";
case InstructionType.Push:
// Handle PUSH instruction
if (instruction.StructuredOperands.Count >= 1)
{
var src = instruction.StructuredOperands[0];
return $"// Push {FormatOperand(src)} onto stack";
}
break;
case InstructionType.Pop:
// Handle POP instruction
if (instruction.StructuredOperands.Count >= 1)
{
var dest = instruction.StructuredOperands[0];
return $"{FormatOperand(dest)} = pop(); // Pop from stack";
}
break;
case InstructionType.Inc:
// Handle INC instruction
if (instruction.StructuredOperands.Count >= 1)
{
var dest = instruction.StructuredOperands[0];
return $"{FormatOperand(dest)}++;";
}
break;
case InstructionType.Dec:
// Handle DEC instruction
if (instruction.StructuredOperands.Count >= 1)
{
var dest = instruction.StructuredOperands[0];
return $"{FormatOperand(dest)}--;";
}
break;
case InstructionType.Shl:
// Handle SHL/SAL instruction (shift left)
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var count = instruction.StructuredOperands[1];
return $"{FormatOperand(dest)} <<= {FormatOperand(count)};";
}
break;
case InstructionType.Shr:
// Handle SHR instruction (shift right logical)
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var count = instruction.StructuredOperands[1];
return $"{FormatOperand(dest)} >>>= {FormatOperand(count)}; // Logical shift right";
}
break;
case InstructionType.Sar:
// Handle SAR instruction (shift right arithmetic)
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var count = instruction.StructuredOperands[1];
return $"{FormatOperand(dest)} >>= {FormatOperand(count)}; // Arithmetic shift right";
}
break;
default:
// For other instructions, just add a comment
return $"// {instruction}";
}
return string.Empty;
}
/// <summary>
/// Formats an operand for display in pseudocode
/// </summary>
/// <param name="operand">The operand to format</param>
/// <returns>A string representation of the operand</returns>
private string FormatOperand(Operand operand)
{
if (operand is RegisterOperand regOp)
{
// Format register operand
return RegisterMapper.GetRegisterName(regOp.Register, 32);
}
else if (operand is ImmediateOperand immOp)
{
// Format immediate operand
return $"0x{immOp.Value:X}";
}
else if (operand is DisplacementMemoryOperand dispOp)
{
// Format displacement memory operand
string baseReg = RegisterMapper.GetRegisterName(dispOp.BaseRegister, 32);
return $"*({baseReg} + 0x{dispOp.Displacement:X})";
}
else if (operand is BaseRegisterMemoryOperand baseOp)
{
// Format base register memory operand
string baseReg = RegisterMapper.GetRegisterName(baseOp.BaseRegister, 32);
return $"*({baseReg})";
}
// Default formatting
return operand.ToString();
}
/// <summary>
/// Checks if an instruction is part of the function prologue or epilogue
/// </summary>
/// <param name="instruction">The instruction to check</param>
/// <returns>True if the instruction is part of the prologue or epilogue, false otherwise</returns>
private bool IsPrologueOrEpilogueInstruction(Instruction instruction)
{
// Check for common prologue/epilogue instructions
if (instruction.Type == InstructionType.Push &&
instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand reg &&
reg.Register == RegisterIndex.Bp)
{
return true; // push ebp
}
if (instruction.Type == InstructionType.Mov &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand destReg &&
instruction.StructuredOperands[1] is RegisterOperand srcReg &&
destReg.Register == RegisterIndex.Bp &&
srcReg.Register == RegisterIndex.Sp)
{
return true; // mov ebp, esp
}
if (instruction.Type == InstructionType.Pop &&
instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand popReg &&
popReg.Register == RegisterIndex.Bp)
{
return true; // pop ebp
}
if (instruction.Type == InstructionType.Ret)
{
return true; // ret
}
return false;
}
/// <summary>
/// Checks if the given instruction type is a conditional jump
/// </summary>
/// <param name="type">The instruction type</param>
/// <returns>True if the instruction is a conditional jump, false otherwise</returns>
private bool IsConditionalJump(InstructionType type)
{
// Check for common conditional jumps
return type == InstructionType.Jz ||
type == InstructionType.Jnz ||
type == InstructionType.Jg ||
type == InstructionType.Jge ||
type == InstructionType.Jl ||
type == InstructionType.Jle ||
type == InstructionType.Ja ||
type == InstructionType.Jae ||
type == InstructionType.Jb ||
type == InstructionType.Jbe ||
type == InstructionType.Jo ||
type == InstructionType.Jno ||
type == InstructionType.Js ||
type == InstructionType.Jns;
}
/// <summary>
/// Gets the target address of a jump instruction
/// </summary>
/// <param name="instruction">The jump instruction</param>
/// <returns>The target address of the jump</returns>
private ulong GetJumpTargetAddress(Instruction instruction)
{
// Jump instructions have the target address as their first operand
if (instruction.StructuredOperands.Count > 0)
{
return instruction.StructuredOperands[0].GetValue();
}
// If we can't determine the target address, return 0
return 0;
}
/// <summary>
/// Generates a condition expression based on a conditional jump instruction
/// </summary>
/// <param name="instruction">The conditional jump instruction</param>
/// <param name="left">The left operand of the comparison, if available</param>
/// <param name="right">The right operand of the comparison, if available</param>
/// <returns>A string representing the condition expression</returns>
private string GenerateConditionFromJump(Instruction instruction, string? left = null, string? right = null)
{
// If we don't have comparison operands, use a generic condition
if (left == null || right == null)
{
switch (instruction.Type)
{
case InstructionType.Jz: return "zero flag is set";
case InstructionType.Jnz: return "zero flag is not set";
default: return "condition";
}
}
// If we have comparison operands, generate a more specific condition
switch (instruction.Type)
{
case InstructionType.Jz: return $"{left} == 0";
case InstructionType.Jnz: return $"{left} != 0";
default: return $"{left} ? {right}";
}
}
}

View File

@ -1,252 +0,0 @@
using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
/// <summary>
/// Analyzes disassembled code to identify and track variables
/// </summary>
public class VariableAnalyzer
{
/// <summary>
/// The analyzer context
/// </summary>
private readonly AnalyzerContext _context;
/// <summary>
/// Creates a new variable analyzer
/// </summary>
/// <param name="context">The analyzer context</param>
public VariableAnalyzer(AnalyzerContext context)
{
_context = context;
}
/// <summary>
/// Analyzes the function to identify stack variables
/// </summary>
/// <param name="function">The function to analyze</param>
public void AnalyzeStackVariables(Function function)
{
// Dictionary to track stack offsets and their corresponding variables
var stackOffsets = new Dictionary<int, Variable>();
// First, identify the function prologue to determine stack frame setup
bool hasPushEbp = false;
bool hasMovEbpEsp = false;
int localSize = 0;
// Look for the function prologue pattern: push ebp; mov ebp, esp; sub esp, X
foreach (var block in function.AsmFunction.Blocks)
{
foreach (var instruction in block.Instructions)
{
// Look for push ebp
if (instruction.Type == InstructionType.Push &&
instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand regOp &&
regOp.Register == RegisterIndex.Bp)
{
hasPushEbp = true;
continue;
}
// Look for mov ebp, esp
if (instruction.Type == InstructionType.Mov &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand destReg &&
instruction.StructuredOperands[1] is RegisterOperand srcReg &&
destReg.Register == RegisterIndex.Bp &&
srcReg.Register == RegisterIndex.Sp)
{
hasMovEbpEsp = true;
continue;
}
// Look for sub esp, X to determine local variable space
if (instruction.Type == InstructionType.Sub &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand subReg &&
instruction.StructuredOperands[1] is ImmediateOperand immOp &&
subReg.Register == RegisterIndex.Sp)
{
localSize = (int)immOp.Value;
break;
}
}
// If we found the complete prologue, no need to check more blocks
if (hasPushEbp && hasMovEbpEsp && localSize > 0)
{
break;
}
}
// If we didn't find a standard prologue, we can't reliably analyze stack variables
if (!hasPushEbp || !hasMovEbpEsp)
{
return;
}
// Now scan for memory accesses relative to EBP
foreach (var block in function.AsmFunction.Blocks)
{
foreach (var instruction in block.Instructions)
{
// Look for memory operands that reference [ebp+X] or [ebp-X]
foreach (var operand in instruction.StructuredOperands)
{
if (operand is DisplacementMemoryOperand memOp &&
memOp.BaseRegister == RegisterIndex.Bp)
{
// This is accessing memory relative to EBP
int offset = (int)memOp.Displacement;
// Determine if this is a parameter or local variable
if (offset > 0 && offset < 1000) // Positive offset = parameter (with reasonable limit)
{
// Parameters start at [ebp+8] (return address at [ebp+4], saved ebp at [ebp+0])
int paramIndex = (offset - 8) / 4; // Assuming 4-byte parameters
// Make sure we have enough parameters in the function
while (function.Parameters.Count <= paramIndex)
{
var param = new Variable($"param_{function.Parameters.Count + 1}", DataType.Unknown)
{
Storage = Variable.StorageType.Parameter,
StackOffset = 8 + (function.Parameters.Count * 4),
IsParameter = true,
ParameterIndex = function.Parameters.Count,
Size = 4 // Assume 4 bytes (32-bit)
};
function.Parameters.Add(param);
}
}
else if (offset < 0 && offset > -1000) // Negative offset = local variable (with reasonable limit)
{
// Check if we've already seen this offset
if (!stackOffsets.TryGetValue(offset, out var variable))
{
// Create a new local variable
variable = new Variable($"local_{Math.Abs(offset)}", DataType.Unknown)
{
Storage = Variable.StorageType.Stack,
StackOffset = offset,
Size = 4 // Assume 4 bytes (32-bit)
};
// Add to our tracking dictionaries
stackOffsets[offset] = variable;
function.LocalVariables.Add(variable);
}
// Track the usage of this variable
TrackVariableUsage(variable, instruction);
}
}
}
}
}
// Analyze register-based variables
AnalyzeRegisterVariables(function);
}
/// <summary>
/// Analyzes register usage to identify variables stored in registers
/// </summary>
/// <param name="function">The function to analyze</param>
private void AnalyzeRegisterVariables(Function function)
{
// This is a more complex analysis that would track register values across blocks
// For now, we'll focus on identifying registers that hold consistent values
// Dictionary to track register variables
var registerVariables = new Dictionary<RegisterIndex, Variable>();
// For each block, analyze register usage
foreach (var block in function.AsmFunction.Blocks)
{
// Check if we have register values for this block from data flow analysis
var registerValuesKey = "RegisterValues";
if (_context.GetAnalysisData<Dictionary<RegisterIndex, DataFlowAnalyzer.ValueInfo>>(block.Address, registerValuesKey) is Dictionary<RegisterIndex, DataFlowAnalyzer.ValueInfo> registerValues)
{
foreach (var kvp in registerValues)
{
var register = kvp.Key;
var valueInfo = kvp.Value;
// Skip special registers like ESP and EBP
if (register == RegisterIndex.Sp || register == RegisterIndex.Bp)
{
continue;
}
// If the register holds a constant value, it might be a variable
if (valueInfo.Type == DataFlowAnalyzer.ValueInfo.ValueType.Constant)
{
// Check if we already have a variable for this register
if (!registerVariables.TryGetValue(register, out var variable))
{
// Create a new register variable
variable = new Variable($"reg_{RegisterMapper.GetRegisterName(register, 32)}", DataType.Unknown)
{
Storage = Variable.StorageType.Register,
Register = register,
Size = 4 // Assume 4 bytes (32-bit)
};
// Add to our tracking dictionary
registerVariables[register] = variable;
function.RegisterVariables.Add(variable);
}
}
}
}
}
}
/// <summary>
/// Tracks how a variable is used in an instruction
/// </summary>
/// <param name="variable">The variable to track</param>
/// <param name="instruction">The instruction using the variable</param>
private void TrackVariableUsage(Variable variable, Instruction instruction)
{
// For now, we'll just try to infer the variable type based on its usage
// If the variable is used in a comparison with 0, it might be a boolean
if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
{
if (instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[1] is ImmediateOperand immOp &&
immOp.Value == 0)
{
// This might be a boolean check
if (variable.Type == DataType.Unknown)
{
// Set to int for now as we don't have a bool type
variable.Type = DataType.Int;
}
}
}
// If the variable is used with string instructions, it might be a string
// Check for string operations - we don't have specific string instruction types yet
// Skip string detection for now as we don't have the specific instruction types
// We'll detect strings through other means later
// If the variable is used with floating-point instructions, it might be a float
// Check for floating-point operations
if (instruction.Type == InstructionType.Fld ||
instruction.Type == InstructionType.Fst ||
instruction.Type == InstructionType.Fstp)
{
if (variable.Type == DataType.Unknown)
{
variable.Type = DataType.Float;
}
}
}
}

View File

@ -1,5 +1,6 @@
using X86Disassembler.Analysers;
using X86Disassembler.PE;
using X86Disassembler.ProjectSystem;
using X86Disassembler.X86;
namespace X86Disassembler;
@ -49,6 +50,16 @@ public class Program
// Print import information
PrintPeImports(peFile);
var projectPeFile = new ProjectPeFile()
{
ImageBase = new VirtualAddress(0, peFile.OptionalHeader.ImageBase),
Architecture = peFile.OptionalHeader.Is64Bit()
? "64-bit"
: "32-bit",
Name = Path.GetFileName(FilePath),
EntryPointAddress = new FileAbsoluteAddress(peFile.OptionalHeader.AddressOfEntryPoint, peFile.OptionalHeader.ImageBase)
};
// Find code sections
var codeSections = peFile.SectionHeaders.FindAll(s => s.ContainsCode());
Console.WriteLine($"Found {codeSections.Count} code section(s):");
@ -56,72 +67,32 @@ public class Program
{
Console.WriteLine($" - {section.Name}: Size={section.VirtualSize} bytes, RVA=0x{section.VirtualAddress:X8}");
}
Console.WriteLine();
var projectPeFileSections = peFile.SectionHeaders.Select(
x => new ProjectPeFileSection()
{
Name = x.Name,
Flags = (x.ContainsCode() ? SectionFlags.Code : SectionFlags.None) |
(x.IsReadable() ? SectionFlags.Read : SectionFlags.None) |
(x.IsWritable() ? SectionFlags.Write : SectionFlags.None) |
(x.IsExecutable() ? SectionFlags.Exec : SectionFlags.None) ,
VirtualAddress = new VirtualAddress(x.VirtualAddress, peFile.OptionalHeader.ImageBase),
Size = x.VirtualSize
}
).ToList();
// Disassemble the first code section
if (codeSections.Count > 0)
{
var section = codeSections[0];
byte[] codeBytes = peFile.GetSectionData(peFile.SectionHeaders.IndexOf(section));
// // First demonstrate sequential disassembly
// Console.WriteLine($"Sequential disassembly of section {section.Name} at RVA 0x{section.VirtualAddress:X8}:");
//
// // Create a disassembler for the code section
// // Base address should be the section's virtual address, not the image base + VA
// Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress);
//
// // Disassemble sequentially (linear approach)
// var linearInstructions = disassembler.Disassemble();
//
// // Print the first 30 instructions from linear disassembly
// int linearCount = Math.Min(30, linearInstructions.Count);
// for (int i = 0; i < linearCount; i++)
// {
// Console.WriteLine(linearInstructions[i]);
// }
//
// disassemble entry point
var disassembler = new BlockDisassembler(codeBytes, section.VirtualAddress);
var asmFunction = disassembler.DisassembleFromAddress(peFile.OptionalHeader.AddressOfEntryPoint);
// Run all analyzers on the function
asmFunction.Analyze();
// Create a decompiler engine
var decompiler = new DecompilerEngine(peFile);
try
{
// Find a suitable exported function to decompile
// Let's try to find a function that might have more complex control flow
var exportedFunctions = peFile.ExportedFunctions;
// Print all exported functions to help us choose a better one
Console.WriteLine("Available exported functions:");
foreach (var func in exportedFunctions)
{
Console.WriteLine($" - {func.Name} (RVA=0x{func.AddressRva:X8})");
}
// Decompile the entry point function
Console.WriteLine($"\nDecompiling entry point function at address 0x{peFile.OptionalHeader.AddressOfEntryPoint:X8}\n");
// Decompile the entry point function
var function = decompiler.DecompileFunction(peFile.OptionalHeader.AddressOfEntryPoint);
// Generate pseudocode
var pseudocode = decompiler.GeneratePseudocode(function);
Console.WriteLine("\nGenerated Pseudocode:\n");
Console.WriteLine(pseudocode);
}
catch (Exception ex)
{
Console.WriteLine($"Error decompiling function: {ex.Message}");
}
// Skip displaying detailed loop information to keep output concise
Console.WriteLine(asmFunction);
}
// Console.WriteLine("\nPress Enter to exit...");
@ -150,6 +121,7 @@ public class Program
}
}
}
Console.WriteLine();
}
@ -165,6 +137,7 @@ public class Program
var export = peFile.ExportedFunctions[i];
Console.WriteLine($" {i}: {export.Name} (Ordinal={export.Ordinal}, RVA=0x{export.AddressRva:X8})");
}
Console.WriteLine();
}
@ -181,6 +154,7 @@ public class Program
Console.WriteLine($" {peFile.SectionHeaders.IndexOf(section)}: {section.Name,-8} VA=0x{section.VirtualAddress:X8} Size={section.VirtualSize,-8} [{flags}]");
}
Console.WriteLine();
}
}

View File

@ -0,0 +1,35 @@
using X86Disassembler.Analysers;
namespace X86Disassembler.ProjectSystem;
public class ProjectPeFile
{
public string Name { get; set; }
public string Architecture { get; set; }
public Address EntryPointAddress { get; set; }
public Address ImageBase { get; set; }
}
public class ProjectPeFileSection
{
public string Name { get; set; }
public Address VirtualAddress { get; set; }
public ulong Size { get; set; }
public SectionFlags Flags { get; set; }
}
[Flags]
public enum SectionFlags
{
None = 0,
Code = 1,
Exec = 2,
Read = 4,
Write = 8
}