diff --git a/X86Disassembler/Analysers/AnalyzerContext.cs b/X86Disassembler/Analysers/AnalyzerContext.cs
deleted file mode 100644
index 18f2b1f..0000000
--- a/X86Disassembler/Analysers/AnalyzerContext.cs
+++ /dev/null
@@ -1,125 +0,0 @@
-namespace X86Disassembler.Analysers;
-
-///
-/// Central context for all analysis data related to a disassembled function
-///
-public class AnalyzerContext
-{
- ///
- /// The function being analyzed
- ///
- public AsmFunction Function { get; }
-
- ///
- /// Dictionary mapping block addresses to instruction blocks
- ///
- public Dictionary BlocksByAddress { get; } = [];
-
- ///
- /// Dictionary mapping loop header addresses to loops
- ///
- public Dictionary LoopsByHeaderAddress { get; } = [];
-
- ///
- /// Dictionary mapping block addresses to the loops that contain them
- ///
- public Dictionary> LoopsByBlockAddress { get; } = [];
-
- ///
- /// Dictionary for storing arbitrary analysis data by address
- ///
- public Dictionary> AnalysisDataByAddress { get; } = [];
-
- ///
- /// Creates a new analyzer context for the given function
- ///
- /// The function to analyze
- public AnalyzerContext(AsmFunction function)
- {
- Function = function;
-
- // Initialize the block dictionary
- foreach (var block in function.Blocks)
- {
- BlocksByAddress[block.Address] = block;
- }
- }
-
- ///
- /// Represents a loop in the control flow graph
- ///
- public class Loop
- {
- ///
- /// The header block of the loop (the entry point into the loop)
- ///
- public InstructionBlock Header { get; set; } = null!;
-
- ///
- /// The blocks that are part of this loop
- ///
- public List Blocks { get; set; } = [];
-
- ///
- /// The back edge that completes the loop (from a block back to the header)
- ///
- public (InstructionBlock From, InstructionBlock To) BackEdge { get; set; }
-
- ///
- /// The exit blocks of the loop (blocks that have successors outside the loop)
- ///
- public List ExitBlocks { get; set; } = [];
- }
-
- ///
- /// Stores analysis data for a specific address
- ///
- /// The address to store data for
- /// The key for the data
- /// The data to store
- public void StoreAnalysisData(ulong address, string key, object value)
- {
- if (!AnalysisDataByAddress.TryGetValue(address, out var dataDict))
- {
- dataDict = [];
- AnalysisDataByAddress[address] = dataDict;
- }
-
- dataDict[key] = value;
- }
-
- ///
- /// Retrieves analysis data for a specific address
- ///
- /// The address to retrieve data for
- /// The key for the data
- /// The stored data, or null if not found
- public object? GetAnalysisData(ulong address, string key)
- {
- if (AnalysisDataByAddress.TryGetValue(address, out var dataDict) &&
- dataDict.TryGetValue(key, out var value))
- {
- return value;
- }
-
- return null;
- }
-
- ///
- /// Retrieves typed analysis data for a specific address
- ///
- /// The type of data to retrieve
- /// The address to retrieve data for
- /// The key for the data
- /// The stored data, or default(T) if not found or wrong type
- public T? GetAnalysisData(ulong address, string key)
- {
- var data = GetAnalysisData(address, key);
- if (data is T typedData)
- {
- return typedData;
- }
-
- return default;
- }
-}
diff --git a/X86Disassembler/Analysers/AsmFunction.cs b/X86Disassembler/Analysers/AsmFunction.cs
index 12b3e6f..56803bc 100644
--- a/X86Disassembler/Analysers/AsmFunction.cs
+++ b/X86Disassembler/Analysers/AsmFunction.cs
@@ -15,73 +15,8 @@ public class AsmFunction
///
public List Blocks { get; set; } = [];
- ///
- /// The entry block of the function
- ///
- public InstructionBlock? EntryBlock => Blocks.FirstOrDefault(b => b.Address == Address);
-
- ///
- /// The exit blocks of the function (blocks that end with a return instruction)
- ///
- public List ExitBlocks => Blocks.Where(b =>
- b.Instructions.Count > 0 &&
- b.Instructions[^1].Type.IsRet()).ToList();
-
- ///
- /// The analyzer context for this function
- ///
- public AnalyzerContext Context { get; private set; }
-
- ///
- /// Creates a new AsmFunction instance
- ///
- public AsmFunction()
- {
- Context = new AnalyzerContext(this);
- }
-
- ///
- /// Analyzes the function using various analyzers
- ///
- public void Analyze()
- {
- // Analyze loops
- var loopAnalyzer = new LoopAnalyzer();
- loopAnalyzer.AnalyzeLoops(Context);
-
- // Analyze data flow
- var dataFlowAnalyzer = new DataFlowAnalyzer();
- dataFlowAnalyzer.AnalyzeDataFlow(Context);
- }
-
- ///
- /// Returns a string representation of the function, including its address, blocks, and analysis results
- ///
public override string ToString()
{
- string loopsInfo = "";
- if (Context.LoopsByHeaderAddress.Count > 0)
- {
- loopsInfo = $"Loops: {Context.LoopsByHeaderAddress.Count}\n";
- int i = 0;
- foreach (var loop in Context.LoopsByHeaderAddress.Values)
- {
- loopsInfo += $" Loop {i++}: Header=0x{loop.Header.Address:X8}, " +
- $"Blocks={loop.Blocks.Count}, " +
- $"Back Edge=(0x{loop.BackEdge.From.Address:X8} -> 0x{loop.BackEdge.To.Address:X8}), " +
- $"Exits={loop.ExitBlocks.Count}\n";
- }
- }
- else
- {
- loopsInfo = "Loops: None\n";
- }
-
- return $"Function at 0x{Address:X8}\n" +
- $"Entry Block: 0x{EntryBlock?.Address.ToString("X8") ?? "None"}\n" +
- $"Exit Blocks: {(ExitBlocks.Count > 0 ? string.Join(", ", ExitBlocks.Select(b => $"0x{b.Address:X8}")) : "None")}\n" +
- $"Total Blocks: {Blocks.Count}\n" +
- loopsInfo +
- $"{string.Join("\n", Blocks.Select(x => $"\t{x}"))}";
+ return $"{Address:X8}\n{string.Join("\n", Blocks)}";
}
}
\ No newline at end of file
diff --git a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs b/X86Disassembler/Analysers/ControlFlowAnalyzer.cs
deleted file mode 100644
index 4b3b5c9..0000000
--- a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs
+++ /dev/null
@@ -1,303 +0,0 @@
-using X86Disassembler.Analysers.DecompilerTypes;
-using X86Disassembler.X86;
-using X86Disassembler.X86.Operands;
-
-namespace X86Disassembler.Analysers;
-
-///
-/// Analyzes control flow structures in disassembled code
-///
-public class ControlFlowAnalyzer
-{
- ///
- /// The analyzer context
- ///
- private readonly AnalyzerContext _context;
-
- ///
- /// Creates a new control flow analyzer
- ///
- /// The analyzer context
- public ControlFlowAnalyzer(AnalyzerContext context)
- {
- _context = context;
- }
-
- ///
- /// Analyzes the control flow of a function to identify high-level structures
- ///
- /// The function to analyze
- public void AnalyzeControlFlow(Function function)
- {
- // First, identify if-else structures
- IdentifyIfElseStructures(function);
-
- // Then, identify switch statements
- IdentifySwitchStatements(function);
- }
-
- ///
- /// Identifies if-else structures in the control flow graph
- ///
- /// The function to analyze
- private void IdentifyIfElseStructures(Function function)
- {
- // Now analyze each block for conditional jumps
- foreach (var block in function.AsmFunction.Blocks)
- {
- // Get the last instruction in the block
- var lastInstruction = block.Instructions.LastOrDefault();
- if (lastInstruction == null) continue;
-
- // Check if the last instruction is a conditional jump
- if (lastInstruction.Type.IsConditionalJump())
- {
- // Get the jump target address
- ulong targetAddress = GetJumpTargetAddress(lastInstruction);
-
- // Find the target block
- InstructionBlock? targetBlock = null;
- foreach (var b in function.AsmFunction.Blocks)
- {
- if (b.Address == targetAddress)
- {
- targetBlock = b;
- break;
- }
- }
-
- if (targetBlock == null)
- {
- continue;
- }
-
- // Find the fall-through block (should be in the successors)
- InstructionBlock? fallThroughBlock = null;
- foreach (var successor in block.Successors)
- {
- if (successor != targetBlock)
- {
- fallThroughBlock = successor;
- break;
- }
- }
-
- if (fallThroughBlock == null)
- {
- continue;
- }
-
- // Create an if-else structure
- var ifElseStructure = new IfElseStructure
- {
- ConditionBlock = block,
- ThenBlock = targetBlock,
- ElseBlock = fallThroughBlock
- };
-
- // Store the if-else structure in the analysis context
- function.AsmFunction.Context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
- }
- }
-
- // Second pass: identify nested if-else structures
- foreach (var block in function.AsmFunction.Blocks)
- {
- var ifElseStructure = _context.GetAnalysisData(block.Address, "IfElseStructure");
- if (ifElseStructure != null)
- {
- // Check if the 'then' block contains another if-else structure
- var nestedThenIf = _context.GetAnalysisData(ifElseStructure.ThenBlock.Address, "IfElseStructure");
- if (nestedThenIf != null)
- {
- ifElseStructure.NestedThenStructure = nestedThenIf;
- }
-
- // Check if the 'else' block contains another if-else structure
- if (ifElseStructure.ElseBlock != null)
- {
- var nestedElseIf = _context.GetAnalysisData(ifElseStructure.ElseBlock.Address, "IfElseStructure");
- if (nestedElseIf != null)
- {
- ifElseStructure.NestedElseStructure = nestedElseIf;
- }
- }
- }
- }
- }
-
- ///
- /// Identifies switch statements in the control flow graph
- ///
- /// The function to analyze
- private void IdentifySwitchStatements(Function function)
- {
- // For each block in the function
- foreach (var block in function.AsmFunction.Blocks)
- {
- // Look for patterns that indicate a switch statement
- // Common patterns include:
- // 1. A series of compare and jump instructions
- // 2. An indirect jump through a jump table
-
- // For now, we'll focus on the first pattern (series of compares)
- if (IsPotentialSwitchHeader(block))
- {
- // This is a potential switch statement
- var switchStructure = new SwitchStructure
- {
- HeaderBlock = block,
- Cases = []
- };
-
- // Find the cases by analyzing the successors
- foreach (var successor in block.Successors)
- {
- // Each successor is a potential case
- switchStructure.Cases.Add(new SwitchCase
- {
- CaseBlock = successor,
- Value = 0 // We'd need more analysis to determine the actual value
- });
- }
-
- // Store the switch structure in the context
- _context.StoreAnalysisData(block.Address, "SwitchStructure", switchStructure);
- }
- }
- }
-
- ///
- /// Gets the target address of a jump instruction
- ///
- /// The jump instruction
- /// The target address of the jump
- private ulong GetJumpTargetAddress(Instruction instruction)
- {
- // Add debug output to see the instruction and its operands
-
- // For conditional jumps, the target address is the first operand
- if (instruction.StructuredOperands.Count > 0)
- {
- var operand = instruction.StructuredOperands[0];
-
- if (operand is ImmediateOperand immOp)
- {
- return (ulong)immOp.Value;
- }
- else if (operand is RelativeOffsetOperand relOp)
- {
- // For relative jumps, the target address is directly available in the operand
- // We need to convert from file offset to RVA by adding 0x1000 (the section offset)
- // This matches how the blocks are converted in BlockDisassembler.cs
- ulong rvaTargetAddress = relOp.TargetAddress + 0x1000;
- return rvaTargetAddress;
- }
- }
-
- // If we can't determine the target, return 0
- return 0;
- }
-
- ///
- /// Checks if the given block is a potential switch statement header
- ///
- /// The block to check
- /// True if the block is a potential switch header, false otherwise
- private bool IsPotentialSwitchHeader(InstructionBlock block)
- {
- // A switch header typically has multiple successors
- if (block.Successors.Count <= 2)
- {
- return false;
- }
-
- // Look for patterns that indicate a switch statement
- // For now, we'll just check if the block ends with an indirect jump
- if (block.Instructions.Count > 0)
- {
- var lastInstruction = block.Instructions[^1];
- if (lastInstruction.Type == InstructionType.Jmp &&
- lastInstruction.StructuredOperands.Count > 0 &&
- !(lastInstruction.StructuredOperands[0] is ImmediateOperand))
- {
- return true;
- }
- }
-
- return false;
- }
-
- ///
- /// Represents an if-else structure in the control flow graph
- ///
- public class IfElseStructure
- {
- ///
- /// The block containing the condition
- ///
- public InstructionBlock ConditionBlock { get; set; } = null!;
-
- ///
- /// The block representing the 'then' branch (taken when condition is true)
- ///
- public InstructionBlock ThenBlock { get; set; } = null!;
-
- ///
- /// The block representing the 'else' branch (taken when condition is false)
- ///
- public InstructionBlock? ElseBlock { get; set; }
-
- ///
- /// The block where both branches merge back together (if applicable)
- ///
- public InstructionBlock? MergeBlock { get; set; }
-
- ///
- /// Whether this is a complete if-else structure with a merge point
- ///
- public bool IsComplete { get; set; }
-
- ///
- /// Nested if-else structure in the 'then' branch (if any)
- ///
- public IfElseStructure? NestedThenStructure { get; set; }
-
- ///
- /// Nested if-else structure in the 'else' branch (if any)
- ///
- public IfElseStructure? NestedElseStructure { get; set; }
- }
-
- ///
- /// Represents a switch statement in the control flow graph
- ///
- public class SwitchStructure
- {
- ///
- /// The block containing the switch header
- ///
- public InstructionBlock HeaderBlock { get; set; } = null!;
-
- ///
- /// The cases of the switch statement
- ///
- public List Cases { get; set; } = [];
- }
-
- ///
- /// Represents a case in a switch statement
- ///
- public class SwitchCase
- {
- ///
- /// The value of the case
- ///
- public int Value { get; set; }
-
- ///
- /// The block containing the case code
- ///
- public InstructionBlock CaseBlock { get; set; } = null!;
- }
-}
diff --git a/X86Disassembler/Analysers/DataFlowAnalyzer.cs b/X86Disassembler/Analysers/DataFlowAnalyzer.cs
deleted file mode 100644
index 3f51c16..0000000
--- a/X86Disassembler/Analysers/DataFlowAnalyzer.cs
+++ /dev/null
@@ -1,384 +0,0 @@
-using X86Disassembler.X86;
-using X86Disassembler.X86.Operands;
-
-namespace X86Disassembler.Analysers;
-
-///
-/// Analyzes data flow through instructions to track register values
-///
-public class DataFlowAnalyzer
-{
- // Constants for analysis data keys
- private const string REGISTER_VALUE_KEY = "RegisterValue";
- private const string MEMORY_VALUE_KEY = "MemoryValue";
-
- ///
- /// Represents a known value for a register or memory location
- ///
- public class ValueInfo
- {
- ///
- /// The type of value (constant, register, memory, unknown)
- ///
- public enum ValueType
- {
- Unknown,
- Constant,
- Register,
- Memory
- }
-
- ///
- /// The type of this value
- ///
- public ValueType Type { get; set; } = ValueType.Unknown;
-
- ///
- /// The constant value (if Type is Constant)
- ///
- public ulong? ConstantValue { get; set; }
-
- ///
- /// The source register (if Type is Register)
- ///
- public RegisterIndex? SourceRegister { get; set; }
-
- ///
- /// The memory address or expression (if Type is Memory)
- ///
- public string? MemoryExpression { get; set; }
-
- ///
- /// The instruction that defined this value
- ///
- public Instruction? DefiningInstruction { get; set; }
-
- ///
- /// Returns a string representation of the value
- ///
- public override string ToString()
- {
- return Type switch
- {
- ValueType.Constant => $"0x{ConstantValue:X8}",
- ValueType.Register => $"{SourceRegister}",
- ValueType.Memory => $"[{MemoryExpression}]",
- _ => "unknown"
- };
- }
- }
-
- ///
- /// Analyzes data flow in the function and stores results in the analyzer context
- ///
- /// The analyzer context to store results in
- public void AnalyzeDataFlow(AnalyzerContext context)
- {
- // Process each block in order
- foreach (var block in context.Function.Blocks)
- {
- // Dictionary to track register values within this block
- Dictionary registerValues = new();
-
- // Process each instruction in the block
- foreach (var instruction in block.Instructions)
- {
- // Process the instruction based on its type
- ProcessInstruction(instruction, registerValues, context);
-
- // Store the current register state at this instruction's address
- StoreRegisterState(instruction.Address, registerValues, context);
- }
- }
- }
-
- ///
- /// Processes an instruction to update register values
- ///
- /// The instruction to process
- /// The current register values
- /// The analyzer context
- private void ProcessInstruction(Instruction instruction, Dictionary registerValues, AnalyzerContext context)
- {
- // Handle different instruction types
- switch (instruction.Type)
- {
- // MOV instructions
- case InstructionType.Mov:
- ProcessMovInstruction(instruction, registerValues);
- break;
-
- // XOR instructions
- case InstructionType.Xor:
- ProcessXorInstruction(instruction, registerValues);
- break;
-
- // ADD instructions
- case InstructionType.Add:
- ProcessAddInstruction(instruction, registerValues);
- break;
-
- // SUB instructions
- case InstructionType.Sub:
- ProcessSubInstruction(instruction, registerValues);
- break;
-
- // PUSH/POP instructions can affect register values
- case InstructionType.Pop:
- ProcessPopInstruction(instruction, registerValues);
- break;
-
- // Call instructions typically clobber certain registers
- case InstructionType.Call:
- ProcessCallInstruction(instruction, registerValues);
- break;
-
- // Other instructions that modify registers
- default:
- // For now, mark destination registers as unknown for unsupported instructions
- if (instruction.StructuredOperands.Count > 0 &&
- instruction.StructuredOperands[0] is RegisterOperand regOp)
- {
- registerValues[regOp.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Unknown,
- DefiningInstruction = instruction
- };
- }
-
- break;
- }
- }
-
- ///
- /// Processes a MOV instruction to update register values
- ///
- private void ProcessMovInstruction(Instruction instruction, Dictionary registerValues)
- {
- // Handle different MOV variants
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // MOV reg, imm
- if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc)
- {
- registerValues[destReg.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Constant,
- ConstantValue = immSrc.Value,
- DefiningInstruction = instruction
- };
- }
- // MOV reg, reg
- else if (dest is RegisterOperand destReg2 && src is RegisterOperand srcReg)
- {
- if (registerValues.TryGetValue(srcReg.Register, out var srcValue))
- {
- // Copy the source value
- registerValues[destReg2.Register] = new ValueInfo
- {
- Type = srcValue.Type,
- ConstantValue = srcValue.ConstantValue,
- SourceRegister = srcValue.SourceRegister,
- MemoryExpression = srcValue.MemoryExpression,
- DefiningInstruction = instruction
- };
- }
- else
- {
- // Source register value is unknown
- registerValues[destReg2.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Register,
- SourceRegister = srcReg.Register,
- DefiningInstruction = instruction
- };
- }
- }
- // MOV reg, [mem]
- else if (dest is RegisterOperand destReg3 && src is MemoryOperand memSrc)
- {
- registerValues[destReg3.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Memory,
- MemoryExpression = memSrc.ToString(),
- DefiningInstruction = instruction
- };
- }
- // MOV [mem], reg or MOV [mem], imm
- // These don't update register values, so we don't need to handle them here
- }
- }
-
- ///
- /// Processes an XOR instruction to update register values
- ///
- private void ProcessXorInstruction(Instruction instruction, Dictionary registerValues)
- {
- // Handle XOR reg, reg (often used for zeroing a register)
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // XOR reg, same_reg (zeroing idiom)
- if (dest is RegisterOperand destReg && src is RegisterOperand srcReg &&
- destReg.Register == srcReg.Register)
- {
- registerValues[destReg.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Constant,
- ConstantValue = 0,
- DefiningInstruction = instruction
- };
- }
- // Other XOR operations make the result unknown
- else if (dest is RegisterOperand destReg2)
- {
- registerValues[destReg2.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Unknown,
- DefiningInstruction = instruction
- };
- }
- }
- }
-
- ///
- /// Processes an ADD instruction to update register values
- ///
- private void ProcessAddInstruction(Instruction instruction, Dictionary registerValues)
- {
- // Handle ADD reg, imm where we know the register value
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // ADD reg, imm where reg is a known constant
- if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc &&
- registerValues.TryGetValue(destReg.Register, out var destValue) &&
- destValue.Type == ValueInfo.ValueType.Constant &&
- destValue.ConstantValue.HasValue)
- {
- // Calculate the new constant value
- registerValues[destReg.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Constant,
- ConstantValue = (uint?) (destValue.ConstantValue.Value + immSrc.Value),
- DefiningInstruction = instruction
- };
- }
- // Other ADD operations make the result unknown
- else if (dest is RegisterOperand destReg2)
- {
- registerValues[destReg2.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Unknown,
- DefiningInstruction = instruction
- };
- }
- }
- }
-
- ///
- /// Processes a SUB instruction to update register values
- ///
- private void ProcessSubInstruction(Instruction instruction, Dictionary registerValues)
- {
- // Handle SUB reg, imm where we know the register value
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // SUB reg, imm where reg is a known constant
- if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc &&
- registerValues.TryGetValue(destReg.Register, out var destValue) &&
- destValue.Type == ValueInfo.ValueType.Constant &&
- destValue.ConstantValue.HasValue)
- {
- // Calculate the new constant value
- registerValues[destReg.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Constant,
- ConstantValue = (uint?) (destValue.ConstantValue.Value - immSrc.Value),
- DefiningInstruction = instruction
- };
- }
- // Other SUB operations make the result unknown
- else if (dest is RegisterOperand destReg2)
- {
- registerValues[destReg2.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Unknown,
- DefiningInstruction = instruction
- };
- }
- }
- }
-
- ///
- /// Processes a POP instruction to update register values
- ///
- private void ProcessPopInstruction(Instruction instruction, Dictionary registerValues)
- {
- // POP reg makes the register value unknown (comes from stack)
- if (instruction.StructuredOperands.Count >= 1 &&
- instruction.StructuredOperands[0] is RegisterOperand destReg)
- {
- registerValues[destReg.Register] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Unknown,
- DefiningInstruction = instruction
- };
- }
- }
-
- ///
- /// Processes a CALL instruction to update register values
- ///
- private void ProcessCallInstruction(Instruction instruction, Dictionary registerValues)
- {
- // CALL instructions typically clobber EAX, ECX, and EDX in x86 calling conventions
- registerValues[RegisterIndex.A] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Unknown,
- DefiningInstruction = instruction
- };
-
- registerValues[RegisterIndex.C] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Unknown,
- DefiningInstruction = instruction
- };
-
- registerValues[RegisterIndex.D] = new ValueInfo
- {
- Type = ValueInfo.ValueType.Unknown,
- DefiningInstruction = instruction
- };
- }
-
- ///
- /// Stores the current register state at the given address
- ///
- private void StoreRegisterState(ulong address, Dictionary registerValues, AnalyzerContext context)
- {
- // Create a copy of the register values to store
- var registerValuesCopy = new Dictionary(registerValues);
-
- // Store in the context
- context.StoreAnalysisData(address, REGISTER_VALUE_KEY, registerValuesCopy);
- }
-
- ///
- /// Gets the register values at the given address
- ///
- public static Dictionary? GetRegisterValues(ulong address, AnalyzerContext context)
- {
- return context.GetAnalysisData>(address, REGISTER_VALUE_KEY);
- }
-}
\ No newline at end of file
diff --git a/X86Disassembler/Analysers/DecompilerEngine.cs b/X86Disassembler/Analysers/DecompilerEngine.cs
deleted file mode 100644
index f6c3e01..0000000
--- a/X86Disassembler/Analysers/DecompilerEngine.cs
+++ /dev/null
@@ -1,149 +0,0 @@
-using X86Disassembler.Analysers.DecompilerTypes;
-using X86Disassembler.PE;
-using X86Disassembler.X86;
-
-namespace X86Disassembler.Analysers;
-
-///
-/// Main engine for decompiling x86 code
-///
-public class DecompilerEngine
-{
- ///
- /// The PE file being analyzed
- ///
- private readonly PeFile _peFile;
-
- ///
- /// Dictionary of analyzed functions by address
- ///
- private readonly Dictionary _functions = [];
-
- ///
- /// Dictionary of exported function names by address
- ///
- private readonly Dictionary _exportedFunctions = [];
-
- ///
- /// Creates a new decompiler engine for the specified PE file
- ///
- /// The PE file to decompile
- public DecompilerEngine(PeFile peFile)
- {
- _peFile = peFile;
-
- // Initialize the exported functions dictionary
- foreach (var export in peFile.ExportedFunctions)
- {
- _exportedFunctions[export.AddressRva] = export.Name;
- }
- }
-
- ///
- /// Decompiles a function at the specified address
- ///
- /// The address of the function to decompile
- /// The decompiled function
- public Function DecompileFunction(ulong address)
- {
- // Check if we've already analyzed this function
- if (_functions.TryGetValue(address, out var existingFunction))
- {
- return existingFunction;
- }
-
- // Find the code section containing this address
- var codeSection = _peFile.SectionHeaders.Find(s =>
- s.ContainsCode() &&
- address >= s.VirtualAddress &&
- address < s.VirtualAddress + s.VirtualSize);
-
- if (codeSection == null)
- {
- throw new InvalidOperationException($"No code section found containing address 0x{address:X8}");
- }
-
- // Get the section data
- int sectionIndex = _peFile.SectionHeaders.IndexOf(codeSection);
- byte[] codeBytes = _peFile.GetSectionData(sectionIndex);
-
- // Create a disassembler for the code section
- var disassembler = new BlockDisassembler(codeBytes, codeSection.VirtualAddress);
-
- // Disassemble the function
- var asmFunction = disassembler.DisassembleFromAddress((uint)address);
-
- // Create an analyzer context
- var context = new AnalyzerContext(asmFunction);
-
- // Run the analyzers
- var loopAnalyzer = new LoopAnalyzer();
- loopAnalyzer.AnalyzeLoops(context);
-
- var dataFlowAnalyzer = new DataFlowAnalyzer();
- dataFlowAnalyzer.AnalyzeDataFlow(context);
-
- // Get the function name from exports if available
- string functionName = _exportedFunctions.TryGetValue(address, out var name)
- ? name
- : $"func_{address:X8}";
-
- // Analyze the function
- var functionAnalyzer = new FunctionAnalyzer(context);
- var function = functionAnalyzer.AnalyzeFunction(address, functionName);
-
- // Analyze control flow structures
- var controlFlowAnalyzer = new ControlFlowAnalyzer(context);
- controlFlowAnalyzer.AnalyzeControlFlow(function);
-
-
-
- // Store the function in our cache
- _functions[address] = function;
-
- return function;
- }
-
- ///
- /// Generates C-like pseudocode for a decompiled function
- ///
- /// The function to generate pseudocode for
- /// The generated pseudocode
- public string GeneratePseudocode(Function function)
- {
- // Create a pseudocode generator
- var generator = new PseudocodeGenerator();
-
- // Generate the pseudocode
- return generator.GeneratePseudocode(function);
- }
-
- ///
- /// Decompiles all exported functions in the PE file
- ///
- /// A dictionary of decompiled functions by address
- public Dictionary DecompileAllExportedFunctions()
- {
- foreach (var export in _peFile.ExportedFunctions)
- {
- // Skip forwarded exports
- if (export.IsForwarder)
- {
- continue;
- }
-
- try
- {
- DecompileFunction(export.AddressRva);
- }
- catch (Exception ex)
- {
- Console.WriteLine($"Error decompiling function {export.Name} at 0x{export.AddressRva:X8}: {ex.Message}");
- }
- }
-
- return _functions;
- }
-}
-
-
diff --git a/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs b/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs
deleted file mode 100644
index eed8bd8..0000000
--- a/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs
+++ /dev/null
@@ -1,58 +0,0 @@
-namespace X86Disassembler.Analysers.DecompilerTypes;
-
-///
-/// Represents a calling convention used by a function
-///
-public enum CallingConvention
-{
- ///
- /// C declaration calling convention (caller cleans the stack)
- /// Parameters are pushed right-to-left
- /// EAX, ECX, EDX are caller-saved
- /// EBX, ESI, EDI, EBP are callee-saved
- /// Return value in EAX (or EDX:EAX for 64-bit values)
- ///
- Cdecl,
-
- ///
- /// Standard calling convention (callee cleans the stack)
- /// Parameters are pushed right-to-left
- /// EAX, ECX, EDX are caller-saved
- /// EBX, ESI, EDI, EBP are callee-saved
- /// Return value in EAX (or EDX:EAX for 64-bit values)
- ///
- Stdcall,
-
- ///
- /// Fast calling convention
- /// First two parameters in ECX and EDX, rest on stack right-to-left
- /// EAX, ECX, EDX are caller-saved
- /// EBX, ESI, EDI, EBP are callee-saved
- /// Return value in EAX
- /// Callee cleans the stack
- ///
- Fastcall,
-
- ///
- /// This calling convention (C++ member functions)
- /// 'this' pointer in ECX, other parameters pushed right-to-left
- /// EAX, ECX, EDX are caller-saved
- /// EBX, ESI, EDI, EBP are callee-saved
- /// Return value in EAX
- /// Caller cleans the stack
- ///
- Thiscall,
-
- ///
- /// Microsoft vectorcall convention
- /// First six parameters in registers (XMM0-XMM5 for floating point, ECX, EDX, R8, R9 for integers)
- /// Additional parameters pushed right-to-left
- /// Return value in EAX or XMM0
- ///
- Vectorcall,
-
- ///
- /// Unknown calling convention
- ///
- Unknown
-}
diff --git a/X86Disassembler/Analysers/DecompilerTypes/DataType.cs b/X86Disassembler/Analysers/DecompilerTypes/DataType.cs
deleted file mode 100644
index 85b0d90..0000000
--- a/X86Disassembler/Analysers/DecompilerTypes/DataType.cs
+++ /dev/null
@@ -1,190 +0,0 @@
-namespace X86Disassembler.Analysers.DecompilerTypes;
-
-///
-/// Represents a data type in decompiled code
-///
-public class DataType
-{
- ///
- /// The category of the data type
- ///
- public enum TypeCategory
- {
- ///
- /// Unknown type
- ///
- Unknown,
-
- ///
- /// Void type (no value)
- ///
- Void,
-
- ///
- /// Integer type
- ///
- Integer,
-
- ///
- /// Floating point type
- ///
- Float,
-
- ///
- /// Pointer type
- ///
- Pointer,
-
- ///
- /// Structure type
- ///
- Struct,
-
- ///
- /// Array type
- ///
- Array,
-
- ///
- /// Function type
- ///
- Function
- }
-
- ///
- /// The name of the type
- ///
- public string Name { get; set; } = string.Empty;
-
- ///
- /// The category of the type
- ///
- public TypeCategory Category { get; set; }
-
- ///
- /// The size of the type in bytes
- ///
- public int Size { get; set; }
-
- ///
- /// Whether the type is signed (for integer types)
- ///
- public bool IsSigned { get; set; }
-
- ///
- /// The pointed-to type (for pointer types)
- ///
- public DataType? PointedType { get; set; }
-
- ///
- /// The element type (for array types)
- ///
- public DataType? ElementType { get; set; }
-
- ///
- /// The number of elements (for array types)
- ///
- public int? ElementCount { get; set; }
-
- ///
- /// The fields of the structure (for struct types)
- ///
- public List Fields { get; set; } = [];
-
- ///
- /// Creates a new data type with the specified name and category
- ///
- /// The name of the type
- /// The category of the type
- /// The size of the type in bytes
- public DataType(string name, TypeCategory category, int size)
- {
- Name = name;
- Category = category;
- Size = size;
- }
-
- ///
- /// Returns a string representation of the type
- ///
- public override string ToString()
- {
- return Name;
- }
-
- ///
- /// Creates a pointer type to the specified type
- ///
- /// The type being pointed to
- /// A new pointer type
- public static DataType CreatePointerType(DataType pointedType)
- {
- return new DataType($"{pointedType.Name}*", TypeCategory.Pointer, 4)
- {
- PointedType = pointedType
- };
- }
-
- ///
- /// Creates an array type of the specified element type and count
- ///
- /// The type of the array elements
- /// The number of elements in the array
- /// A new array type
- public static DataType CreateArrayType(DataType elementType, int count)
- {
- return new DataType($"{elementType.Name}[{count}]", TypeCategory.Array, elementType.Size * count)
- {
- ElementType = elementType,
- ElementCount = count
- };
- }
-
- ///
- /// Common predefined types
- ///
- public static readonly DataType Unknown = new DataType("unknown", TypeCategory.Unknown, 0);
- public static readonly DataType Void = new DataType("void", TypeCategory.Void, 0);
- public static readonly DataType Char = new DataType("char", TypeCategory.Integer, 1) { IsSigned = true };
- public static readonly DataType UChar = new DataType("unsigned char", TypeCategory.Integer, 1);
- public static readonly DataType Short = new DataType("short", TypeCategory.Integer, 2) { IsSigned = true };
- public static readonly DataType UShort = new DataType("unsigned short", TypeCategory.Integer, 2);
- public static readonly DataType Int = new DataType("int", TypeCategory.Integer, 4) { IsSigned = true };
- public static readonly DataType UInt = new DataType("unsigned int", TypeCategory.Integer, 4);
- public static readonly DataType Float = new DataType("float", TypeCategory.Float, 4);
- public static readonly DataType Double = new DataType("double", TypeCategory.Float, 8);
-}
-
-///
-/// Represents a field in a structure
-///
-public class StructField
-{
- ///
- /// The name of the field
- ///
- public string Name { get; set; } = string.Empty;
-
- ///
- /// The type of the field
- ///
- public DataType Type { get; set; } = DataType.Unknown;
-
- ///
- /// The offset of the field within the structure
- ///
- public int Offset { get; set; }
-
- ///
- /// Creates a new structure field
- ///
- /// The name of the field
- /// The type of the field
- /// The offset of the field within the structure
- public StructField(string name, DataType type, int offset)
- {
- Name = name;
- Type = type;
- Offset = offset;
- }
-}
diff --git a/X86Disassembler/Analysers/DecompilerTypes/Function.cs b/X86Disassembler/Analysers/DecompilerTypes/Function.cs
deleted file mode 100644
index 29af312..0000000
--- a/X86Disassembler/Analysers/DecompilerTypes/Function.cs
+++ /dev/null
@@ -1,98 +0,0 @@
-using X86Disassembler.X86;
-using X86Disassembler.X86.Operands;
-
-namespace X86Disassembler.Analysers.DecompilerTypes;
-
-///
-/// Represents a function in decompiled code
-///
-public class Function
-{
- ///
- /// The name of the function
- ///
- public string Name { get; set; } = string.Empty;
-
- ///
- /// The address of the function
- ///
- public ulong Address { get; set; }
-
- ///
- /// The return type of the function
- ///
- public DataType ReturnType { get; set; } = DataType.Void;
-
- ///
- /// The parameters of the function
- ///
- public List Parameters { get; set; } = [];
-
- ///
- /// Local variables in this function
- ///
- public List LocalVariables { get; } = [];
-
- ///
- /// Variables stored in registers
- ///
- public List RegisterVariables { get; } = [];
-
- ///
- /// The calling convention used by the function
- ///
- public CallingConvention CallingConvention { get; set; } = CallingConvention.Cdecl;
-
- ///
- /// The assembly function representation
- ///
- public AsmFunction AsmFunction { get; set; }
-
- ///
- /// Creates a new function with the specified name and address
- ///
- /// The name of the function
- /// The address of the function
- /// The assembly function representation
- public Function(string name, ulong address, AsmFunction asmFunction)
- {
- Name = name;
- Address = address;
- AsmFunction = asmFunction;
- }
-
- ///
- /// Analyzes the function to identify variables
- ///
- public void AnalyzeVariables()
- {
- // Create a variable analyzer
- var variableAnalyzer = new VariableAnalyzer(AsmFunction.Context);
-
- // Analyze stack variables
- variableAnalyzer.AnalyzeStackVariables(this);
- }
-
-
-
-
-
-
-
- ///
- /// Returns a string representation of the function signature
- ///
- public string GetSignature()
- {
- string paramList = string.Join(", ", Parameters.Select(p => $"{p.Type} {p.Name}"));
- return $"{ReturnType} {Name}({paramList})";
- }
-
- ///
- /// Returns a string representation of the function
- ///
- public override string ToString()
- {
- return GetSignature();
- }
-}
diff --git a/X86Disassembler/Analysers/DecompilerTypes/Variable.cs b/X86Disassembler/Analysers/DecompilerTypes/Variable.cs
deleted file mode 100644
index c428824..0000000
--- a/X86Disassembler/Analysers/DecompilerTypes/Variable.cs
+++ /dev/null
@@ -1,102 +0,0 @@
-namespace X86Disassembler.Analysers.DecompilerTypes;
-
-///
-/// Represents a variable in decompiled code
-///
-public class Variable
-{
- ///
- /// The type of storage for a variable
- ///
- public enum StorageType
- {
- ///
- /// Variable stored on the stack (local variable)
- ///
- Stack,
-
- ///
- /// Variable stored in a register
- ///
- Register,
-
- ///
- /// Variable stored in global memory
- ///
- Global,
-
- ///
- /// Function parameter passed on the stack
- ///
- Parameter,
-
- ///
- /// Function parameter passed in a register
- ///
- RegisterParameter
- }
-
- ///
- /// The name of the variable
- ///
- public string Name { get; set; } = string.Empty;
-
- ///
- /// The type of the variable
- ///
- public DataType Type { get; set; } = DataType.Unknown;
-
- ///
- /// The storage location of the variable
- ///
- public StorageType Storage { get; set; }
-
- ///
- /// The offset from the base pointer (for stack variables)
- ///
- public int? StackOffset { get; set; }
-
- ///
- /// The register that holds this variable (for register variables)
- ///
- public X86.RegisterIndex? Register { get; set; }
-
- ///
- /// The memory address (for global variables)
- ///
- public ulong? Address { get; set; }
-
- ///
- /// The size of the variable in bytes
- ///
- public int Size { get; set; }
-
- ///
- /// Whether this variable is a function parameter
- ///
- public bool IsParameter { get; set; }
-
- ///
- /// The parameter index (if this is a parameter)
- ///
- public int? ParameterIndex { get; set; }
-
- ///
- /// Creates a new variable with the specified name and type
- ///
- /// The name of the variable
- /// The type of the variable
- public Variable(string name, DataType type)
- {
- Name = name;
- Type = type;
- }
-
- ///
- /// Returns a string representation of the variable
- ///
- public override string ToString()
- {
- return $"{Type} {Name}";
- }
-}
diff --git a/X86Disassembler/Analysers/FileAbsoluteAddress.cs b/X86Disassembler/Analysers/FileAbsoluteAddress.cs
new file mode 100644
index 0000000..c3fc01c
--- /dev/null
+++ b/X86Disassembler/Analysers/FileAbsoluteAddress.cs
@@ -0,0 +1,56 @@
+namespace X86Disassembler.Analysers;
+
+public abstract class Address(ulong value, ulong imageBase)
+{
+ ///
+ /// The actual value of the address, not specifically typed.
+ ///
+ protected readonly ulong Value = value;
+
+ ///
+ /// PE.ImageBase from which this address is constructed
+ ///
+ protected readonly ulong ImageBase = imageBase;
+}
+
+///
+/// Absolute address in the PE file
+///
+public class FileAbsoluteAddress(ulong value, ulong imageBase) : Address(value, imageBase)
+{
+ public ulong GetValue()
+ {
+ return Value;
+ }
+
+ public virtual VirtualAddress AsImageBaseAddress()
+ {
+ return new VirtualAddress(Value + ImageBase, ImageBase);
+ }
+
+ public virtual FileAbsoluteAddress AsFileAbsolute()
+ {
+ return this;
+ }
+}
+
+///
+/// Address from PE.ImageBase
+///
+public class VirtualAddress : FileAbsoluteAddress
+{
+ public VirtualAddress(ulong value, ulong imageBase) : base(value, imageBase)
+ {
+ }
+
+ public override VirtualAddress AsImageBaseAddress()
+ {
+ return this;
+ }
+
+ public override FileAbsoluteAddress AsFileAbsolute()
+ {
+ return new FileAbsoluteAddress(Value - ImageBase, ImageBase);
+ }
+}
+
diff --git a/X86Disassembler/Analysers/FunctionAnalyzer.cs b/X86Disassembler/Analysers/FunctionAnalyzer.cs
deleted file mode 100644
index fac2e1d..0000000
--- a/X86Disassembler/Analysers/FunctionAnalyzer.cs
+++ /dev/null
@@ -1,132 +0,0 @@
-using X86Disassembler.Analysers.DecompilerTypes;
-using X86Disassembler.X86;
-using X86Disassembler.X86.Operands;
-
-namespace X86Disassembler.Analysers;
-
-///
-/// Analyzes disassembled functions to identify variables, parameters, and control flow structures
-///
-public class FunctionAnalyzer
-{
- ///
- /// The analyzer context
- ///
- private readonly AnalyzerContext _context;
-
- ///
- /// Creates a new function analyzer
- ///
- /// The analyzer context
- public FunctionAnalyzer(AnalyzerContext context)
- {
- _context = context;
- }
-
- ///
- /// Analyzes a function at the specified address
- ///
- /// The address of the function
- /// The name of the function (if known)
- /// The analyzed function
- public Function AnalyzeFunction(ulong address, string name = "")
- {
- // If no name is provided, generate one based on the address
- if (string.IsNullOrEmpty(name))
- {
- name = $"func_{address:X8}";
- }
-
- // Create a function object
- var function = new Function(name, address, _context.Function)
- {
- ReturnType = DataType.Unknown // Default to unknown return type
- };
-
- // Create a variable analyzer and analyze variables
- var variableAnalyzer = new VariableAnalyzer(_context);
- variableAnalyzer.AnalyzeStackVariables(function);
-
- // Determine the calling convention
- DetermineCallingConvention(function);
-
- // Infer parameter and return types
- InferTypes(function);
-
- return function;
- }
-
- ///
- /// Determines the calling convention of a function based on its behavior
- ///
- /// The function to analyze
- private void DetermineCallingConvention(Function function)
- {
- // By default, we'll assume cdecl
- function.CallingConvention = CallingConvention.Cdecl;
-
- // Get the exit blocks (blocks with ret instructions)
- var exitBlocks = function.AsmFunction.Blocks.Where(b =>
- b.Instructions.Count > 0 &&
- b.Instructions.Last().Type == InstructionType.Ret).ToList();
-
- // Check if the function cleans up its own stack
- bool cleansOwnStack = false;
-
- // Look for ret instructions with an immediate operand
- foreach (var block in function.AsmFunction.Blocks)
- {
- var lastInstruction = block.Instructions.LastOrDefault();
- if (lastInstruction != null && lastInstruction.Type == InstructionType.Ret)
- {
- // If the ret instruction has an immediate operand, it's cleaning its own stack
- if (lastInstruction.StructuredOperands.Count > 0 &&
- lastInstruction.StructuredOperands[0] is ImmediateOperand immOp &&
- immOp.Value > 0)
- {
- cleansOwnStack = true;
- break;
- }
- }
- }
-
- // If the function cleans its own stack, it's likely stdcall
- if (cleansOwnStack)
- {
- function.CallingConvention = CallingConvention.Stdcall;
-
- // Check for thiscall (ECX used for this pointer)
- // This would require more sophisticated analysis of register usage
- }
-
- // Check for fastcall (first two parameters in ECX and EDX)
- // This would require more sophisticated analysis of register usage
- }
-
- ///
- /// Infers types for parameters and local variables based on their usage
- ///
- /// The function to analyze
- private void InferTypes(Function function)
- {
- // This is a complex analysis that would require tracking how variables are used
- // For now, we'll just set default types
-
- // Set return type based on register usage
- function.ReturnType = DataType.Int; // Default to int
-
- // For each parameter, try to infer its type
- foreach (var param in function.Parameters)
- {
- // Default to int for now
- param.Type = DataType.Int;
- }
-
- // For each local variable, try to infer its type
- foreach (var localVar in function.LocalVariables)
- {
- // Default to int for now
- localVar.Type = DataType.Int;
- }
- }
-}
diff --git a/X86Disassembler/Analysers/LoopAnalyzer.cs b/X86Disassembler/Analysers/LoopAnalyzer.cs
deleted file mode 100644
index f68624e..0000000
--- a/X86Disassembler/Analysers/LoopAnalyzer.cs
+++ /dev/null
@@ -1,120 +0,0 @@
-namespace X86Disassembler.Analysers;
-
-///
-/// Analyzes the control flow graph to identify loops
-///
-public class LoopAnalyzer
-{
- ///
- /// Identifies loops in the given function and stores them in the analyzer context
- ///
- /// The analyzer context to store results in
- public void AnalyzeLoops(AnalyzerContext context)
- {
- // A back edge is an edge from a node to one of its dominators
- // For our simplified approach, we'll identify back edges as edges that point to blocks
- // with a lower address (potential loop headers)
- foreach (var block in context.Function.Blocks)
- {
- foreach (var successor in block.Successors)
- {
- // If the successor has a lower address than the current block,
- // it's potentially a back edge forming a loop
- if (successor.Address < block.Address)
- {
- // Create a new loop with the identified back edge
- var loop = new AnalyzerContext.Loop
- {
- Header = successor,
- BackEdge = (block, successor)
- };
-
- // Find all blocks in the loop using a breadth-first search
- FindLoopBlocks(loop);
-
- // Find the exit blocks of the loop
- FindLoopExits(loop);
-
- // Store the loop in the context
- context.LoopsByHeaderAddress[successor.Address] = loop;
-
- // Update the blocks-to-loops mapping
- foreach (var loopBlock in loop.Blocks)
- {
- if (!context.LoopsByBlockAddress.TryGetValue(loopBlock.Address, out var loops))
- {
- loops = [];
- context.LoopsByBlockAddress[loopBlock.Address] = loops;
- }
-
- loops.Add(loop);
- }
- }
- }
- }
- }
-
- ///
- /// Finds all blocks that are part of the loop
- ///
- /// The loop to analyze
- private void FindLoopBlocks(AnalyzerContext.Loop loop)
- {
- // Start with the header block
- loop.Blocks.Add(loop.Header);
-
- // Use a queue for breadth-first search
- Queue queue = new Queue();
- queue.Enqueue(loop.BackEdge.From); // Start from the back edge source
-
- // Keep track of visited blocks to avoid cycles
- HashSet visited = new HashSet { loop.Header.Address };
-
- while (queue.Count > 0)
- {
- var block = queue.Dequeue();
-
- // If we've already processed this block, skip it
- if (!visited.Add(block.Address))
- {
- continue;
- }
-
- // Add the block to the loop
- loop.Blocks.Add(block);
-
- // Add all predecessors to the queue (except those that would take us outside the loop)
- foreach (var predecessor in block.Predecessors)
- {
- // Skip the header's predecessors that aren't in the loop already
- // (to avoid including blocks outside the loop)
- if (block == loop.Header && !loop.Blocks.Contains(predecessor) && predecessor != loop.BackEdge.From)
- {
- continue;
- }
-
- queue.Enqueue(predecessor);
- }
- }
- }
-
- ///
- /// Finds all exit blocks of the loop (blocks that have successors outside the loop)
- ///
- /// The loop to analyze
- private void FindLoopExits(AnalyzerContext.Loop loop)
- {
- foreach (var block in loop.Blocks)
- {
- foreach (var successor in block.Successors)
- {
- // If the successor is not part of the loop, this block is an exit
- if (!loop.Blocks.Contains(successor))
- {
- loop.ExitBlocks.Add(block);
- break; // Once we've identified this block as an exit, we can stop checking its successors
- }
- }
- }
- }
-}
diff --git a/X86Disassembler/Analysers/PseudocodeGenerator.cs b/X86Disassembler/Analysers/PseudocodeGenerator.cs
deleted file mode 100644
index 9e9b606..0000000
--- a/X86Disassembler/Analysers/PseudocodeGenerator.cs
+++ /dev/null
@@ -1,862 +0,0 @@
-using System.Text;
-using X86Disassembler.Analysers.DecompilerTypes;
-using X86Disassembler.X86;
-using X86Disassembler.X86.Operands;
-
-namespace X86Disassembler.Analysers;
-
-///
-/// Generates C-like pseudocode from decompiled functions
-///
-public class PseudocodeGenerator
-{
- ///
- /// Generates pseudocode for a decompiled function
- ///
- /// The function to generate pseudocode for
- /// The generated pseudocode
- public string GeneratePseudocode(Function function)
- {
- var result = new StringBuilder();
-
- // Add function signature
- result.AppendLine($"{function.ReturnType} {function.Name}({string.Join(", ", function.Parameters.Select(p => $"{p.Type} {p.Name}"))})")
- .AppendLine("{");
-
- // Add local variable declarations
- foreach (var localVar in function.LocalVariables)
- {
- result.AppendLine($" {localVar.Type} {localVar.Name}; // Stack offset: {localVar.StackOffset}");
- }
-
- // Add register variable declarations
- foreach (var regVar in function.RegisterVariables)
- {
- result.AppendLine($" {regVar.Type} {regVar.Name}; // Register: {RegisterMapper.GetRegisterName(regVar.Register!.Value, 32)}");
- }
-
- if (function.LocalVariables.Count > 0 || function.RegisterVariables.Count > 0)
- {
- result.AppendLine();
- }
-
- // Generate the function body using control flow analysis
- GenerateFunctionBody(function, result, 1);
-
- // Add a return statement
- result.AppendLine()
- .AppendLine(" return 0; // Placeholder return value")
- .AppendLine("}");
-
- return result.ToString();
- }
-
- ///
- /// Generates the body of the function using control flow analysis
- ///
- /// The function to generate code for
- /// The string builder to append to
- /// The current indentation level
- private void GenerateFunctionBody(Function function, StringBuilder result, int indentLevel)
- {
- // Try to find the entry block
- var entryBlock = function.AsmFunction.EntryBlock;
-
- // If the entry block is not found, try to find a block with an address that matches the function address minus the base address
- if (entryBlock == null && function.AsmFunction.Blocks.Count > 0)
- {
- // Get the first block as a fallback
- entryBlock = function.AsmFunction.Blocks[0];
-
- // Log a warning but continue with the first block
- result.AppendLine($"{new string(' ', indentLevel * 4)}// Warning: Entry block not found at address 0x{function.Address:X8}, using first block at 0x{entryBlock.Address:X8}");
- }
- else if (entryBlock == null)
- {
- result.AppendLine($"{new string(' ', indentLevel * 4)}// Function body could not be decompiled - no blocks found");
- return;
- }
-
- // Process blocks in order, starting from the entry block
- var processedBlocks = new HashSet();
- GenerateBlockCode(function, entryBlock, result, indentLevel, processedBlocks);
- }
-
- ///
- /// Generates code for a basic block and its successors
- ///
- /// The function containing the block
- /// The block to generate code for
- /// The string builder to append to
- /// The current indentation level
- /// Set of blocks that have already been processed
- private void GenerateBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet processedBlocks)
- {
- // Check if we've already processed this block
- if (processedBlocks.Contains(block.Address))
- {
- return;
- }
-
- // Mark this block as processed
- processedBlocks.Add(block.Address);
-
- // Check if this block is part of a control flow structure
- var context = function.AsmFunction.Context;
-
- // Check for if-else structure
- var ifElseStructure = context.GetAnalysisData(block.Address, "IfElseStructure");
- if (ifElseStructure != null && ifElseStructure.ConditionBlock.Address == block.Address)
- {
- // This block is the condition of an if-else structure
- GenerateIfElseCode(function, ifElseStructure, result, indentLevel, processedBlocks);
- return;
- }
-
- // Check for switch structure
- var switchStructure = context.GetAnalysisData(block.Address, "SwitchStructure");
- if (switchStructure != null && switchStructure.HeaderBlock.Address == block.Address)
- {
- // This block is the header of a switch structure
- GenerateSwitchCode(function, switchStructure, result, indentLevel, processedBlocks);
- return;
- }
-
- // Check if this block is part of a loop
- var loops = context.LoopsByBlockAddress.TryGetValue(block.Address, out var blockLoops) ? blockLoops : null;
- if (loops != null && loops.Count > 0)
- {
- // Get the innermost loop
- var loop = loops[0];
-
- // Check if this is the loop header
- if (loop.Header.Address == block.Address)
- {
- // This block is the header of a loop
- GenerateLoopCode(function, loop, result, indentLevel, processedBlocks);
- return;
- }
- }
-
- // If we get here, this is a regular block
- GenerateRegularBlockCode(function, block, result, indentLevel, processedBlocks);
- }
-
- ///
- /// Generates code for a regular basic block
- ///
- /// The function containing the block
- /// The block to generate code for
- /// The string builder to append to
- /// The current indentation level
- /// Set of blocks that have already been processed
- private void GenerateRegularBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet processedBlocks)
- {
- // Add a comment with the block address
- result.AppendLine($"{new string(' ', indentLevel * 4)}// Block at 0x{block.Address:X8}");
-
- // Check if this block ends with a conditional jump
- bool hasConditionalJump = block.Instructions.Count > 0 &&
- IsConditionalJump(block.Instructions[^1].Type);
-
- // Add debug info about conditional jumps
- if (hasConditionalJump)
- {
- var jumpInstruction = block.Instructions[^1];
- result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Conditional jump {jumpInstruction} detected");
-
- // Get the jump target address
- ulong targetAddress = GetJumpTargetAddress(jumpInstruction);
- result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Jump target: 0x{targetAddress:X8}");
-
- // Check if we can find a comparison instruction before the jump
- Instruction? comparisonInstruction = null;
- for (int i = block.Instructions.Count - 2; i >= 0 && i >= block.Instructions.Count - 5; i--)
- {
- var instruction = block.Instructions[i];
- if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
- {
- comparisonInstruction = instruction;
- break;
- }
- }
-
- if (comparisonInstruction != null)
- {
- result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Found comparison: {comparisonInstruction}");
- }
- else
- {
- result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: No comparison instruction found");
- }
- }
-
- // If this block has a conditional jump but wasn't detected as an if-else structure,
- // we'll create an inline if statement for better readability
- if (hasConditionalJump && block.Successors.Count == 2)
- {
- // Get the last instruction (conditional jump)
- var jumpInstruction = block.Instructions[^1];
-
- // Generate condition based on the jump type
- string condition = GenerateConditionFromJump(jumpInstruction);
-
- // Generate code for all instructions except the last one (the jump)
- for (int i = 0; i < block.Instructions.Count - 1; i++)
- {
- var instruction = block.Instructions[i];
-
- // Skip prologue/epilogue instructions
- if (IsPrologueOrEpilogueInstruction(instruction))
- {
- continue;
- }
-
- // Generate pseudocode for this instruction
- var pseudocode = GenerateInstructionPseudocode(function, instruction);
- if (!string.IsNullOrEmpty(pseudocode))
- {
- result.AppendLine($"{new string(' ', indentLevel * 4)}{pseudocode}");
- }
- else
- {
- // If we couldn't generate pseudocode, add the instruction as a comment
- result.AppendLine($"{new string(' ', indentLevel * 4)}/* {instruction} */;");
- }
- }
-
- // Generate the if statement
- result.AppendLine($"{new string(' ', indentLevel * 4)}if ({condition})");
- result.AppendLine($"{new string(' ', indentLevel * 4)}{{");
-
- // Find the target block (true branch)
- var targetAddress = GetJumpTargetAddress(jumpInstruction);
- var targetBlock = block.Successors.FirstOrDefault(s => s.Address == targetAddress);
-
- if (targetBlock != null)
- {
- // Generate code for the target block
- GenerateBlockCode(function, targetBlock, result, indentLevel + 1, processedBlocks);
- }
-
- result.AppendLine($"{new string(' ', indentLevel * 4)}}}");
-
- // Find the fallthrough block (false branch)
- var fallthroughBlock = block.Successors.FirstOrDefault(s => s.Address != targetAddress);
-
- if (fallthroughBlock != null && !processedBlocks.Contains(fallthroughBlock.Address))
- {
- // Generate code for the fallthrough block
- GenerateBlockCode(function, fallthroughBlock, result, indentLevel, processedBlocks);
- }
- }
- else
- {
- // Regular block processing
- // Generate code for each instruction in the block
- foreach (var instruction in block.Instructions)
- {
- // Skip prologue/epilogue instructions
- if (IsPrologueOrEpilogueInstruction(instruction))
- {
- continue;
- }
-
- // Generate pseudocode for this instruction
- var pseudocode = GenerateInstructionPseudocode(function, instruction);
- if (!string.IsNullOrEmpty(pseudocode))
- {
- result.AppendLine($"{new string(' ', indentLevel * 4)}{pseudocode}");
- }
- else
- {
- // If we couldn't generate pseudocode, add the instruction as a comment
- result.AppendLine($"{new string(' ', indentLevel * 4)}/* {instruction} */;");
- }
- }
-
- // Process successors in order
- foreach (var successor in block.Successors)
- {
- // Only process successors that haven't been processed yet
- if (!processedBlocks.Contains(successor.Address))
- {
- GenerateBlockCode(function, successor, result, indentLevel, processedBlocks);
- }
- }
- }
- }
-
- ///
- /// Generates code for an if-else structure
- ///
- /// The function containing the structure
- /// The if-else structure to generate code for
- /// The string builder to append to
- /// The current indentation level
- /// Set of blocks that have already been processed
- private void GenerateIfElseCode(Function function, ControlFlowAnalyzer.IfElseStructure ifElseStructure, StringBuilder result, int indentLevel, HashSet processedBlocks)
- {
- // Mark the condition block as processed
- processedBlocks.Add(ifElseStructure.ConditionBlock.Address);
-
- // Generate the condition expression
- string condition = GenerateConditionExpression(function, ifElseStructure.ConditionBlock);
-
- // Add the if statement
- string indent = new string(' ', indentLevel * 4);
- result.AppendLine($"{indent}if ({condition})");
- result.AppendLine($"{indent}{{");
-
- // Check if the 'then' branch contains a nested if-else structure
- if (ifElseStructure.NestedThenStructure != null)
- {
- // Generate code for the nested if-else structure in the 'then' branch
- GenerateIfElseCode(function, ifElseStructure.NestedThenStructure, result, indentLevel + 1, processedBlocks);
- }
- else
- {
- // Generate code for the 'then' branch normally
- GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks);
- }
-
- // Close the 'then' branch
- result.AppendLine($"{indent}}}");
-
- // Add the 'else' branch if it exists and is not already processed
- if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address))
- {
- result.AppendLine($"{indent}else");
- result.AppendLine($"{indent}{{");
-
- // Check if the 'else' branch contains a nested if-else structure (else-if)
- if (ifElseStructure.NestedElseStructure != null)
- {
- // Generate code for the nested if-else structure in the 'else' branch
- GenerateIfElseCode(function, ifElseStructure.NestedElseStructure, result, indentLevel + 1, processedBlocks);
- }
- else
- {
- // Generate code for the 'else' branch normally
- GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks);
- }
-
- // Close the 'else' branch
- result.AppendLine($"{indent}}}");
- }
-
- // If this is a complete if-else structure with a merge point, and the merge point hasn't been processed yet
- if (ifElseStructure.IsComplete && ifElseStructure.MergeBlock != null &&
- !processedBlocks.Contains(ifElseStructure.MergeBlock.Address))
- {
- // Generate code for the merge block
- GenerateBlockCode(function, ifElseStructure.MergeBlock, result, indentLevel, processedBlocks);
- }
- }
-
- ///
- /// Generates code for a switch structure
- ///
- /// The function containing the structure
- /// The switch structure to generate code for
- /// The string builder to append to
- /// The current indentation level
- /// Set of blocks that have already been processed
- private void GenerateSwitchCode(Function function, ControlFlowAnalyzer.SwitchStructure switchStructure, StringBuilder result, int indentLevel, HashSet processedBlocks)
- {
- // Mark the header block as processed
- processedBlocks.Add(switchStructure.HeaderBlock.Address);
-
- // Generate the switch expression
- string switchExpr = "/* switch expression */";
-
- // Add the switch statement
- string indent = new string(' ', indentLevel * 4);
- result.AppendLine($"{indent}// Switch structure at 0x{switchStructure.HeaderBlock.Address:X8}")
- .AppendLine($"{indent}switch ({switchExpr})");
-
- // Add the switch body
- result.AppendLine($"{indent}{{")
- .AppendLine();
-
- // Generate code for each case
- foreach (var switchCase in switchStructure.Cases)
- {
- // Add the case label
- result.AppendLine($"{indent} case {switchCase.Value}:")
- .AppendLine($"{indent} // Case block at 0x{switchCase.CaseBlock.Address:X8}");
-
- // Generate code for the case block
- GenerateBlockCode(function, switchCase.CaseBlock, result, indentLevel + 2, processedBlocks);
-
- // Add a break statement
- result.AppendLine($"{indent} break;")
- .AppendLine();
- }
-
- // Add a default case
- result.AppendLine($"{indent} default:")
- .AppendLine($"{indent} // Default case")
- .AppendLine($"{indent} break;");
-
- // Close the switch body
- result.AppendLine($"{indent}}}");
- }
-
- ///
- /// Generates code for a loop structure
- ///
- /// The function containing the structure
- /// The loop to generate code for
- /// The string builder to append to
- /// The current indentation level
- /// Set of blocks that have already been processed
- private void GenerateLoopCode(Function function, AnalyzerContext.Loop loop, StringBuilder result, int indentLevel, HashSet processedBlocks)
- {
- // Mark the header block as processed
- processedBlocks.Add(loop.Header.Address);
-
- // Add the loop header
- string indent = new string(' ', indentLevel * 4);
- result.AppendLine($"{indent}// Loop at 0x{loop.Header.Address:X8}")
- .AppendLine($"{indent}while (true) // Simplified loop condition");
-
- // Add the loop body
- result.AppendLine($"{indent}{{")
- .AppendLine($"{indent} // Loop body");
-
- // Generate code for the loop body (starting with the header)
- GenerateBlockCode(function, loop.Header, result, indentLevel + 1, processedBlocks);
-
- // Close the loop body
- result.AppendLine($"{indent}}}");
- }
-
- ///
- /// Generates a condition expression for an if statement
- ///
- /// The function containing the block
- /// The block containing the condition
- /// A string representing the condition expression
- private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock)
- {
- // If the block is empty, return a placeholder
- if (conditionBlock.Instructions.Count == 0)
- {
- return "condition";
- }
-
- // Get the last instruction (should be a conditional jump)
- var lastInstruction = conditionBlock.Instructions[^1];
-
- // If it's not a conditional jump, return a placeholder
- if (!IsConditionalJump(lastInstruction.Type))
- {
- return "condition";
- }
-
- // Look for a CMP or TEST instruction that sets the flags for this jump
- Instruction? comparisonInstruction = null;
-
- // Search backwards from the jump instruction to find a comparison
- for (int i = conditionBlock.Instructions.Count - 2; i >= 0; i--)
- {
- var instruction = conditionBlock.Instructions[i];
- if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
- {
- comparisonInstruction = instruction;
- break;
- }
- }
-
- // If we found a comparison instruction, generate a condition based on it and the jump
- if (comparisonInstruction != null && comparisonInstruction.StructuredOperands.Count >= 2)
- {
- var left = FormatOperand(comparisonInstruction.StructuredOperands[0]);
- var right = FormatOperand(comparisonInstruction.StructuredOperands[1]);
-
- // Generate condition based on jump type
- return GenerateConditionFromJump(lastInstruction, left, right);
- }
-
- // If we couldn't find a comparison instruction, just use the jump condition
- return GenerateConditionFromJump(lastInstruction, null, null);
- }
-
- ///
- /// Generates pseudocode for a single instruction
- ///
- /// The function containing the instruction
- /// The instruction to generate pseudocode for
- /// The generated pseudocode
- private string GenerateInstructionPseudocode(Function function, Instruction instruction)
- {
- // Check for special cases first
- if (instruction.Type == InstructionType.Xor && instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // Check for XOR with self (zeroing a register)
- if (dest is RegisterOperand regDest && src is RegisterOperand regSrc &&
- regDest.Register == regSrc.Register)
- {
- // This is a common idiom to zero a register
- return $"{FormatOperand(dest)} = 0; // XOR with self to zero register";
- }
- }
-
- // Handle different instruction types
- switch (instruction.Type)
- {
- case InstructionType.Mov:
- // Handle MOV instruction
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // Special case for moving 0 (common initialization pattern)
- if (src is ImmediateOperand immSrc && immSrc.Value == 0)
- {
- return $"{FormatOperand(dest)} = 0; // Initialize to zero";
- }
-
- return $"{FormatOperand(dest)} = {FormatOperand(src)};";
- }
- break;
-
- case InstructionType.Add:
- // Handle ADD instruction
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // Special case for adding 1 (increment)
- if (src is ImmediateOperand immSrc && immSrc.Value == 1)
- {
- return $"{FormatOperand(dest)}++; // Increment";
- }
-
- return $"{FormatOperand(dest)} += {FormatOperand(src)};";
- }
- break;
-
- case InstructionType.Sub:
- // Handle SUB instruction
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // Special case for subtracting 1 (decrement)
- if (src is ImmediateOperand immSrc && immSrc.Value == 1)
- {
- return $"{FormatOperand(dest)}--; // Decrement";
- }
-
- return $"{FormatOperand(dest)} -= {FormatOperand(src)};";
- }
- break;
-
- case InstructionType.And:
- // Handle AND instruction
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- return $"{FormatOperand(dest)} &= {FormatOperand(src)};";
- }
- break;
-
- case InstructionType.Or:
- // Handle OR instruction
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- return $"{FormatOperand(dest)} |= {FormatOperand(src)};";
- }
- break;
-
- case InstructionType.Xor:
- // Handle XOR instruction
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var src = instruction.StructuredOperands[1];
-
- // We already handled the special case of XOR with self above
- return $"{FormatOperand(dest)} ^= {FormatOperand(src)};";
- }
- break;
-
- case InstructionType.Test:
- // Handle TEST instruction (no assignment, just sets flags)
- if (instruction.StructuredOperands.Count >= 2)
- {
- var left = instruction.StructuredOperands[0];
- var right = instruction.StructuredOperands[1];
-
- // Special case for TEST with self (checking if a register is zero)
- if (left is RegisterOperand regLeft && right is RegisterOperand regRight &&
- regLeft.Register == regRight.Register)
- {
- return $"// Check if {FormatOperand(left)} is zero";
- }
-
- return $"// Test {FormatOperand(left)} & {FormatOperand(right)}";
- }
- break;
-
- case InstructionType.Cmp:
- // Handle CMP instruction (no assignment, just sets flags)
- if (instruction.StructuredOperands.Count >= 2)
- {
- var left = instruction.StructuredOperands[0];
- var right = instruction.StructuredOperands[1];
-
- // For CMP, we'll return a comment that explains what's being compared
- // This will help with understanding the following conditional jumps
- return $"// Compare {FormatOperand(left)} with {FormatOperand(right)}";
- }
- break;
-
- case InstructionType.Call:
- // Handle CALL instruction
- if (instruction.StructuredOperands.Count >= 1)
- {
- var target = instruction.StructuredOperands[0];
-
- // For function calls, we'll generate a proper function call expression
- return $"{FormatOperand(target)}(); // Function call";
- }
- break;
-
- case InstructionType.Ret:
- // Handle RET instruction
- return "return 0; // Placeholder return value";
-
- case InstructionType.Push:
- // Handle PUSH instruction
- if (instruction.StructuredOperands.Count >= 1)
- {
- var src = instruction.StructuredOperands[0];
- return $"// Push {FormatOperand(src)} onto stack";
- }
- break;
-
- case InstructionType.Pop:
- // Handle POP instruction
- if (instruction.StructuredOperands.Count >= 1)
- {
- var dest = instruction.StructuredOperands[0];
- return $"{FormatOperand(dest)} = pop(); // Pop from stack";
- }
- break;
-
- case InstructionType.Inc:
- // Handle INC instruction
- if (instruction.StructuredOperands.Count >= 1)
- {
- var dest = instruction.StructuredOperands[0];
- return $"{FormatOperand(dest)}++;";
- }
- break;
-
- case InstructionType.Dec:
- // Handle DEC instruction
- if (instruction.StructuredOperands.Count >= 1)
- {
- var dest = instruction.StructuredOperands[0];
- return $"{FormatOperand(dest)}--;";
- }
- break;
-
- case InstructionType.Shl:
- // Handle SHL/SAL instruction (shift left)
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var count = instruction.StructuredOperands[1];
- return $"{FormatOperand(dest)} <<= {FormatOperand(count)};";
- }
- break;
-
- case InstructionType.Shr:
- // Handle SHR instruction (shift right logical)
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var count = instruction.StructuredOperands[1];
- return $"{FormatOperand(dest)} >>>= {FormatOperand(count)}; // Logical shift right";
- }
- break;
-
- case InstructionType.Sar:
- // Handle SAR instruction (shift right arithmetic)
- if (instruction.StructuredOperands.Count >= 2)
- {
- var dest = instruction.StructuredOperands[0];
- var count = instruction.StructuredOperands[1];
- return $"{FormatOperand(dest)} >>= {FormatOperand(count)}; // Arithmetic shift right";
- }
- break;
-
- default:
- // For other instructions, just add a comment
- return $"// {instruction}";
- }
-
- return string.Empty;
- }
-
- ///
- /// Formats an operand for display in pseudocode
- ///
- /// The operand to format
- /// A string representation of the operand
- private string FormatOperand(Operand operand)
- {
- if (operand is RegisterOperand regOp)
- {
- // Format register operand
- return RegisterMapper.GetRegisterName(regOp.Register, 32);
- }
- else if (operand is ImmediateOperand immOp)
- {
- // Format immediate operand
- return $"0x{immOp.Value:X}";
- }
- else if (operand is DisplacementMemoryOperand dispOp)
- {
- // Format displacement memory operand
- string baseReg = RegisterMapper.GetRegisterName(dispOp.BaseRegister, 32);
- return $"*({baseReg} + 0x{dispOp.Displacement:X})";
- }
- else if (operand is BaseRegisterMemoryOperand baseOp)
- {
- // Format base register memory operand
- string baseReg = RegisterMapper.GetRegisterName(baseOp.BaseRegister, 32);
- return $"*({baseReg})";
- }
-
- // Default formatting
- return operand.ToString();
- }
-
- ///
- /// Checks if an instruction is part of the function prologue or epilogue
- ///
- /// The instruction to check
- /// True if the instruction is part of the prologue or epilogue, false otherwise
- private bool IsPrologueOrEpilogueInstruction(Instruction instruction)
- {
- // Check for common prologue/epilogue instructions
- if (instruction.Type == InstructionType.Push &&
- instruction.StructuredOperands.Count > 0 &&
- instruction.StructuredOperands[0] is RegisterOperand reg &&
- reg.Register == RegisterIndex.Bp)
- {
- return true; // push ebp
- }
-
- if (instruction.Type == InstructionType.Mov &&
- instruction.StructuredOperands.Count > 1 &&
- instruction.StructuredOperands[0] is RegisterOperand destReg &&
- instruction.StructuredOperands[1] is RegisterOperand srcReg &&
- destReg.Register == RegisterIndex.Bp &&
- srcReg.Register == RegisterIndex.Sp)
- {
- return true; // mov ebp, esp
- }
-
- if (instruction.Type == InstructionType.Pop &&
- instruction.StructuredOperands.Count > 0 &&
- instruction.StructuredOperands[0] is RegisterOperand popReg &&
- popReg.Register == RegisterIndex.Bp)
- {
- return true; // pop ebp
- }
-
- if (instruction.Type == InstructionType.Ret)
- {
- return true; // ret
- }
-
- return false;
- }
-
- ///
- /// Checks if the given instruction type is a conditional jump
- ///
- /// The instruction type
- /// True if the instruction is a conditional jump, false otherwise
- private bool IsConditionalJump(InstructionType type)
- {
- // Check for common conditional jumps
- return type == InstructionType.Jz ||
- type == InstructionType.Jnz ||
- type == InstructionType.Jg ||
- type == InstructionType.Jge ||
- type == InstructionType.Jl ||
- type == InstructionType.Jle ||
- type == InstructionType.Ja ||
- type == InstructionType.Jae ||
- type == InstructionType.Jb ||
- type == InstructionType.Jbe ||
- type == InstructionType.Jo ||
- type == InstructionType.Jno ||
- type == InstructionType.Js ||
- type == InstructionType.Jns;
- }
-
- ///
- /// Gets the target address of a jump instruction
- ///
- /// The jump instruction
- /// The target address of the jump
- private ulong GetJumpTargetAddress(Instruction instruction)
- {
- // Jump instructions have the target address as their first operand
- if (instruction.StructuredOperands.Count > 0)
- {
- return instruction.StructuredOperands[0].GetValue();
- }
-
- // If we can't determine the target address, return 0
- return 0;
- }
-
- ///
- /// Generates a condition expression based on a conditional jump instruction
- ///
- /// The conditional jump instruction
- /// The left operand of the comparison, if available
- /// The right operand of the comparison, if available
- /// A string representing the condition expression
- private string GenerateConditionFromJump(Instruction instruction, string? left = null, string? right = null)
- {
- // If we don't have comparison operands, use a generic condition
- if (left == null || right == null)
- {
- switch (instruction.Type)
- {
- case InstructionType.Jz: return "zero flag is set";
- case InstructionType.Jnz: return "zero flag is not set";
- default: return "condition";
- }
- }
-
- // If we have comparison operands, generate a more specific condition
- switch (instruction.Type)
- {
- case InstructionType.Jz: return $"{left} == 0";
- case InstructionType.Jnz: return $"{left} != 0";
- default: return $"{left} ? {right}";
- }
- }
-}
diff --git a/X86Disassembler/Analysers/VariableAnalyzer.cs b/X86Disassembler/Analysers/VariableAnalyzer.cs
deleted file mode 100644
index 8f9749d..0000000
--- a/X86Disassembler/Analysers/VariableAnalyzer.cs
+++ /dev/null
@@ -1,252 +0,0 @@
-using X86Disassembler.Analysers.DecompilerTypes;
-using X86Disassembler.X86;
-using X86Disassembler.X86.Operands;
-
-namespace X86Disassembler.Analysers;
-
-///
-/// Analyzes disassembled code to identify and track variables
-///
-public class VariableAnalyzer
-{
- ///
- /// The analyzer context
- ///
- private readonly AnalyzerContext _context;
-
- ///
- /// Creates a new variable analyzer
- ///
- /// The analyzer context
- public VariableAnalyzer(AnalyzerContext context)
- {
- _context = context;
- }
-
- ///
- /// Analyzes the function to identify stack variables
- ///
- /// The function to analyze
- public void AnalyzeStackVariables(Function function)
- {
- // Dictionary to track stack offsets and their corresponding variables
- var stackOffsets = new Dictionary();
-
- // First, identify the function prologue to determine stack frame setup
- bool hasPushEbp = false;
- bool hasMovEbpEsp = false;
- int localSize = 0;
-
- // Look for the function prologue pattern: push ebp; mov ebp, esp; sub esp, X
- foreach (var block in function.AsmFunction.Blocks)
- {
- foreach (var instruction in block.Instructions)
- {
- // Look for push ebp
- if (instruction.Type == InstructionType.Push &&
- instruction.StructuredOperands.Count > 0 &&
- instruction.StructuredOperands[0] is RegisterOperand regOp &&
- regOp.Register == RegisterIndex.Bp)
- {
- hasPushEbp = true;
- continue;
- }
-
- // Look for mov ebp, esp
- if (instruction.Type == InstructionType.Mov &&
- instruction.StructuredOperands.Count > 1 &&
- instruction.StructuredOperands[0] is RegisterOperand destReg &&
- instruction.StructuredOperands[1] is RegisterOperand srcReg &&
- destReg.Register == RegisterIndex.Bp &&
- srcReg.Register == RegisterIndex.Sp)
- {
- hasMovEbpEsp = true;
- continue;
- }
-
- // Look for sub esp, X to determine local variable space
- if (instruction.Type == InstructionType.Sub &&
- instruction.StructuredOperands.Count > 1 &&
- instruction.StructuredOperands[0] is RegisterOperand subReg &&
- instruction.StructuredOperands[1] is ImmediateOperand immOp &&
- subReg.Register == RegisterIndex.Sp)
- {
- localSize = (int)immOp.Value;
- break;
- }
- }
-
- // If we found the complete prologue, no need to check more blocks
- if (hasPushEbp && hasMovEbpEsp && localSize > 0)
- {
- break;
- }
- }
-
- // If we didn't find a standard prologue, we can't reliably analyze stack variables
- if (!hasPushEbp || !hasMovEbpEsp)
- {
- return;
- }
-
- // Now scan for memory accesses relative to EBP
- foreach (var block in function.AsmFunction.Blocks)
- {
- foreach (var instruction in block.Instructions)
- {
- // Look for memory operands that reference [ebp+X] or [ebp-X]
- foreach (var operand in instruction.StructuredOperands)
- {
- if (operand is DisplacementMemoryOperand memOp &&
- memOp.BaseRegister == RegisterIndex.Bp)
- {
- // This is accessing memory relative to EBP
- int offset = (int)memOp.Displacement;
-
- // Determine if this is a parameter or local variable
- if (offset > 0 && offset < 1000) // Positive offset = parameter (with reasonable limit)
- {
- // Parameters start at [ebp+8] (return address at [ebp+4], saved ebp at [ebp+0])
- int paramIndex = (offset - 8) / 4; // Assuming 4-byte parameters
-
- // Make sure we have enough parameters in the function
- while (function.Parameters.Count <= paramIndex)
- {
- var param = new Variable($"param_{function.Parameters.Count + 1}", DataType.Unknown)
- {
- Storage = Variable.StorageType.Parameter,
- StackOffset = 8 + (function.Parameters.Count * 4),
- IsParameter = true,
- ParameterIndex = function.Parameters.Count,
- Size = 4 // Assume 4 bytes (32-bit)
- };
- function.Parameters.Add(param);
- }
- }
- else if (offset < 0 && offset > -1000) // Negative offset = local variable (with reasonable limit)
- {
- // Check if we've already seen this offset
- if (!stackOffsets.TryGetValue(offset, out var variable))
- {
- // Create a new local variable
- variable = new Variable($"local_{Math.Abs(offset)}", DataType.Unknown)
- {
- Storage = Variable.StorageType.Stack,
- StackOffset = offset,
- Size = 4 // Assume 4 bytes (32-bit)
- };
-
- // Add to our tracking dictionaries
- stackOffsets[offset] = variable;
- function.LocalVariables.Add(variable);
- }
-
- // Track the usage of this variable
- TrackVariableUsage(variable, instruction);
- }
- }
- }
- }
- }
-
- // Analyze register-based variables
- AnalyzeRegisterVariables(function);
- }
-
- ///
- /// Analyzes register usage to identify variables stored in registers
- ///
- /// The function to analyze
- private void AnalyzeRegisterVariables(Function function)
- {
- // This is a more complex analysis that would track register values across blocks
- // For now, we'll focus on identifying registers that hold consistent values
-
- // Dictionary to track register variables
- var registerVariables = new Dictionary();
-
- // For each block, analyze register usage
- foreach (var block in function.AsmFunction.Blocks)
- {
- // Check if we have register values for this block from data flow analysis
- var registerValuesKey = "RegisterValues";
- if (_context.GetAnalysisData>(block.Address, registerValuesKey) is Dictionary registerValues)
- {
- foreach (var kvp in registerValues)
- {
- var register = kvp.Key;
- var valueInfo = kvp.Value;
-
- // Skip special registers like ESP and EBP
- if (register == RegisterIndex.Sp || register == RegisterIndex.Bp)
- {
- continue;
- }
-
- // If the register holds a constant value, it might be a variable
- if (valueInfo.Type == DataFlowAnalyzer.ValueInfo.ValueType.Constant)
- {
- // Check if we already have a variable for this register
- if (!registerVariables.TryGetValue(register, out var variable))
- {
- // Create a new register variable
- variable = new Variable($"reg_{RegisterMapper.GetRegisterName(register, 32)}", DataType.Unknown)
- {
- Storage = Variable.StorageType.Register,
- Register = register,
- Size = 4 // Assume 4 bytes (32-bit)
- };
-
- // Add to our tracking dictionary
- registerVariables[register] = variable;
- function.RegisterVariables.Add(variable);
- }
- }
- }
- }
- }
- }
-
- ///
- /// Tracks how a variable is used in an instruction
- ///
- /// The variable to track
- /// The instruction using the variable
- private void TrackVariableUsage(Variable variable, Instruction instruction)
- {
- // For now, we'll just try to infer the variable type based on its usage
-
- // If the variable is used in a comparison with 0, it might be a boolean
- if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
- {
- if (instruction.StructuredOperands.Count > 1 &&
- instruction.StructuredOperands[1] is ImmediateOperand immOp &&
- immOp.Value == 0)
- {
- // This might be a boolean check
- if (variable.Type == DataType.Unknown)
- {
- // Set to int for now as we don't have a bool type
- variable.Type = DataType.Int;
- }
- }
- }
-
- // If the variable is used with string instructions, it might be a string
- // Check for string operations - we don't have specific string instruction types yet
- // Skip string detection for now as we don't have the specific instruction types
- // We'll detect strings through other means later
-
- // If the variable is used with floating-point instructions, it might be a float
- // Check for floating-point operations
- if (instruction.Type == InstructionType.Fld ||
- instruction.Type == InstructionType.Fst ||
- instruction.Type == InstructionType.Fstp)
- {
- if (variable.Type == DataType.Unknown)
- {
- variable.Type = DataType.Float;
- }
- }
- }
-}
diff --git a/X86Disassembler/Program.cs b/X86Disassembler/Program.cs
index 2fa363d..e321f0d 100644
--- a/X86Disassembler/Program.cs
+++ b/X86Disassembler/Program.cs
@@ -1,5 +1,6 @@
using X86Disassembler.Analysers;
using X86Disassembler.PE;
+using X86Disassembler.ProjectSystem;
using X86Disassembler.X86;
namespace X86Disassembler;
@@ -11,7 +12,7 @@ public class Program
{
// Hardcoded file path for testing
private const string FilePath = @"C:\Program Files (x86)\Nikita\Iron Strategy\Terrain.dll";
-
+
///
/// Main entry point
///
@@ -20,18 +21,18 @@ public class Program
{
Console.WriteLine("X86 Disassembler and Decompiler");
Console.WriteLine("--------------------------------");
-
+
// Load the file
Console.WriteLine($"Loading file: {FilePath}");
byte[] fileBytes = File.ReadAllBytes(FilePath);
Console.WriteLine($"Successfully loaded {FilePath}");
Console.WriteLine($"File size: {fileBytes.Length} bytes\n");
-
+
// Parse the PE format
Console.WriteLine("Parsing PE format...\n");
PeFile peFile = new PeFile(fileBytes);
peFile.Parse();
-
+
// Print PE file information
Console.WriteLine("PE File Information:");
Console.WriteLine($"Architecture: {(peFile.OptionalHeader.Is64Bit() ? "64-bit" : "32-bit")}");
@@ -48,7 +49,17 @@ public class Program
// Print import information
PrintPeImports(peFile);
-
+
+ var projectPeFile = new ProjectPeFile()
+ {
+ ImageBase = new VirtualAddress(0, peFile.OptionalHeader.ImageBase),
+ Architecture = peFile.OptionalHeader.Is64Bit()
+ ? "64-bit"
+ : "32-bit",
+ Name = Path.GetFileName(FilePath),
+ EntryPointAddress = new FileAbsoluteAddress(peFile.OptionalHeader.AddressOfEntryPoint, peFile.OptionalHeader.ImageBase)
+ };
+
// Find code sections
var codeSections = peFile.SectionHeaders.FindAll(s => s.ContainsCode());
Console.WriteLine($"Found {codeSections.Count} code section(s):");
@@ -56,74 +67,34 @@ public class Program
{
Console.WriteLine($" - {section.Name}: Size={section.VirtualSize} bytes, RVA=0x{section.VirtualAddress:X8}");
}
+
Console.WriteLine();
-
+
+ var projectPeFileSections = peFile.SectionHeaders.Select(
+ x => new ProjectPeFileSection()
+ {
+ Name = x.Name,
+ Flags = (x.ContainsCode() ? SectionFlags.Code : SectionFlags.None) |
+ (x.IsReadable() ? SectionFlags.Read : SectionFlags.None) |
+ (x.IsWritable() ? SectionFlags.Write : SectionFlags.None) |
+ (x.IsExecutable() ? SectionFlags.Exec : SectionFlags.None) ,
+ VirtualAddress = new VirtualAddress(x.VirtualAddress, peFile.OptionalHeader.ImageBase),
+ Size = x.VirtualSize
+ }
+ ).ToList();
+
// Disassemble the first code section
if (codeSections.Count > 0)
{
var section = codeSections[0];
byte[] codeBytes = peFile.GetSectionData(peFile.SectionHeaders.IndexOf(section));
-
- // // First demonstrate sequential disassembly
- // Console.WriteLine($"Sequential disassembly of section {section.Name} at RVA 0x{section.VirtualAddress:X8}:");
- //
- // // Create a disassembler for the code section
- // // Base address should be the section's virtual address, not the image base + VA
- // Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress);
- //
- // // Disassemble sequentially (linear approach)
- // var linearInstructions = disassembler.Disassemble();
- //
- // // Print the first 30 instructions from linear disassembly
- // int linearCount = Math.Min(30, linearInstructions.Count);
- // for (int i = 0; i < linearCount; i++)
- // {
- // Console.WriteLine(linearInstructions[i]);
- // }
- //
- // disassemble entry point
- var disassembler = new BlockDisassembler(codeBytes, section.VirtualAddress);
-
- var asmFunction = disassembler.DisassembleFromAddress(peFile.OptionalHeader.AddressOfEntryPoint);
-
- // Run all analyzers on the function
- asmFunction.Analyze();
-
- // Create a decompiler engine
- var decompiler = new DecompilerEngine(peFile);
-
- try
- {
- // Find a suitable exported function to decompile
- // Let's try to find a function that might have more complex control flow
- var exportedFunctions = peFile.ExportedFunctions;
-
- // Print all exported functions to help us choose a better one
- Console.WriteLine("Available exported functions:");
- foreach (var func in exportedFunctions)
- {
- Console.WriteLine($" - {func.Name} (RVA=0x{func.AddressRva:X8})");
- }
-
- // Decompile the entry point function
- Console.WriteLine($"\nDecompiling entry point function at address 0x{peFile.OptionalHeader.AddressOfEntryPoint:X8}\n");
-
- // Decompile the entry point function
- var function = decompiler.DecompileFunction(peFile.OptionalHeader.AddressOfEntryPoint);
- // Generate pseudocode
- var pseudocode = decompiler.GeneratePseudocode(function);
- Console.WriteLine("\nGenerated Pseudocode:\n");
- Console.WriteLine(pseudocode);
- }
- catch (Exception ex)
- {
- Console.WriteLine($"Error decompiling function: {ex.Message}");
- }
-
- // Skip displaying detailed loop information to keep output concise
+ var disassembler = new BlockDisassembler(codeBytes, section.VirtualAddress);
+
+ var asmFunction = disassembler.DisassembleFromAddress(peFile.OptionalHeader.AddressOfEntryPoint);
+ Console.WriteLine(asmFunction);
}
-
+
// Console.WriteLine("\nPress Enter to exit...");
// Console.ReadLine();
}
@@ -136,7 +107,7 @@ public class Program
foreach (var import in peFile.ImportDescriptors)
{
Console.WriteLine($" DLL: {import.DllName}");
-
+
for (int i = 0; i < import.Functions.Count; i++)
{
var function = import.Functions[i];
@@ -150,6 +121,7 @@ public class Program
}
}
}
+
Console.WriteLine();
}
@@ -159,12 +131,13 @@ public class Program
Console.WriteLine($"DLL Name: {peFile.ExportDirectory.DllName}");
Console.WriteLine($"Number of Functions: {peFile.ExportDirectory.NumberOfFunctions}");
Console.WriteLine($"Number of Names: {peFile.ExportDirectory.NumberOfNames}");
-
+
for (int i = 0; i < peFile.ExportedFunctions.Count; i++)
{
var export = peFile.ExportedFunctions[i];
Console.WriteLine($" {i}: {export.Name} (Ordinal={export.Ordinal}, RVA=0x{export.AddressRva:X8})");
}
+
Console.WriteLine();
}
@@ -178,9 +151,10 @@ public class Program
if (section.IsExecutable()) flags += "Exec ";
if (section.IsReadable()) flags += "Read ";
if (section.IsWritable()) flags += "Write";
-
+
Console.WriteLine($" {peFile.SectionHeaders.IndexOf(section)}: {section.Name,-8} VA=0x{section.VirtualAddress:X8} Size={section.VirtualSize,-8} [{flags}]");
}
+
Console.WriteLine();
}
}
\ No newline at end of file
diff --git a/X86Disassembler/ProjectSystem/ProjectPeFile.cs b/X86Disassembler/ProjectSystem/ProjectPeFile.cs
new file mode 100644
index 0000000..4551a5a
--- /dev/null
+++ b/X86Disassembler/ProjectSystem/ProjectPeFile.cs
@@ -0,0 +1,35 @@
+using X86Disassembler.Analysers;
+
+namespace X86Disassembler.ProjectSystem;
+
+public class ProjectPeFile
+{
+ public string Name { get; set; }
+
+ public string Architecture { get; set; }
+
+ public Address EntryPointAddress { get; set; }
+
+ public Address ImageBase { get; set; }
+}
+
+public class ProjectPeFileSection
+{
+ public string Name { get; set; }
+
+ public Address VirtualAddress { get; set; }
+
+ public ulong Size { get; set; }
+
+ public SectionFlags Flags { get; set; }
+}
+
+[Flags]
+public enum SectionFlags
+{
+ None = 0,
+ Code = 1,
+ Exec = 2,
+ Read = 4,
+ Write = 8
+}
\ No newline at end of file