From 1c7054781c4a58184673660aeed7d982b75bed2b Mon Sep 17 00:00:00 2001 From: bird_egop Date: Sat, 19 Apr 2025 02:12:46 +0300 Subject: [PATCH] changes all over the place --- X86Disassembler/Analysers/AnalyzerContext.cs | 125 --- X86Disassembler/Analysers/AsmFunction.cs | 67 +- .../Analysers/ControlFlowAnalyzer.cs | 303 ------ X86Disassembler/Analysers/DataFlowAnalyzer.cs | 384 -------- X86Disassembler/Analysers/DecompilerEngine.cs | 149 --- .../DecompilerTypes/CallingConvention.cs | 58 -- .../Analysers/DecompilerTypes/DataType.cs | 190 ---- .../Analysers/DecompilerTypes/Function.cs | 98 -- .../Analysers/DecompilerTypes/Variable.cs | 102 --- .../Analysers/FileAbsoluteAddress.cs | 56 ++ X86Disassembler/Analysers/FunctionAnalyzer.cs | 132 --- X86Disassembler/Analysers/LoopAnalyzer.cs | 120 --- .../Analysers/PseudocodeGenerator.cs | 862 ------------------ X86Disassembler/Analysers/VariableAnalyzer.cs | 252 ----- X86Disassembler/Program.cs | 110 +-- .../ProjectSystem/ProjectPeFile.cs | 35 + 16 files changed, 134 insertions(+), 2909 deletions(-) delete mode 100644 X86Disassembler/Analysers/AnalyzerContext.cs delete mode 100644 X86Disassembler/Analysers/ControlFlowAnalyzer.cs delete mode 100644 X86Disassembler/Analysers/DataFlowAnalyzer.cs delete mode 100644 X86Disassembler/Analysers/DecompilerEngine.cs delete mode 100644 X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs delete mode 100644 X86Disassembler/Analysers/DecompilerTypes/DataType.cs delete mode 100644 X86Disassembler/Analysers/DecompilerTypes/Function.cs delete mode 100644 X86Disassembler/Analysers/DecompilerTypes/Variable.cs create mode 100644 X86Disassembler/Analysers/FileAbsoluteAddress.cs delete mode 100644 X86Disassembler/Analysers/FunctionAnalyzer.cs delete mode 100644 X86Disassembler/Analysers/LoopAnalyzer.cs delete mode 100644 X86Disassembler/Analysers/PseudocodeGenerator.cs delete mode 100644 X86Disassembler/Analysers/VariableAnalyzer.cs create mode 100644 X86Disassembler/ProjectSystem/ProjectPeFile.cs diff --git a/X86Disassembler/Analysers/AnalyzerContext.cs b/X86Disassembler/Analysers/AnalyzerContext.cs deleted file mode 100644 index 18f2b1f..0000000 --- a/X86Disassembler/Analysers/AnalyzerContext.cs +++ /dev/null @@ -1,125 +0,0 @@ -namespace X86Disassembler.Analysers; - -/// -/// Central context for all analysis data related to a disassembled function -/// -public class AnalyzerContext -{ - /// - /// The function being analyzed - /// - public AsmFunction Function { get; } - - /// - /// Dictionary mapping block addresses to instruction blocks - /// - public Dictionary BlocksByAddress { get; } = []; - - /// - /// Dictionary mapping loop header addresses to loops - /// - public Dictionary LoopsByHeaderAddress { get; } = []; - - /// - /// Dictionary mapping block addresses to the loops that contain them - /// - public Dictionary> LoopsByBlockAddress { get; } = []; - - /// - /// Dictionary for storing arbitrary analysis data by address - /// - public Dictionary> AnalysisDataByAddress { get; } = []; - - /// - /// Creates a new analyzer context for the given function - /// - /// The function to analyze - public AnalyzerContext(AsmFunction function) - { - Function = function; - - // Initialize the block dictionary - foreach (var block in function.Blocks) - { - BlocksByAddress[block.Address] = block; - } - } - - /// - /// Represents a loop in the control flow graph - /// - public class Loop - { - /// - /// The header block of the loop (the entry point into the loop) - /// - public InstructionBlock Header { get; set; } = null!; - - /// - /// The blocks that are part of this loop - /// - public List Blocks { get; set; } = []; - - /// - /// The back edge that completes the loop (from a block back to the header) - /// - public (InstructionBlock From, InstructionBlock To) BackEdge { get; set; } - - /// - /// The exit blocks of the loop (blocks that have successors outside the loop) - /// - public List ExitBlocks { get; set; } = []; - } - - /// - /// Stores analysis data for a specific address - /// - /// The address to store data for - /// The key for the data - /// The data to store - public void StoreAnalysisData(ulong address, string key, object value) - { - if (!AnalysisDataByAddress.TryGetValue(address, out var dataDict)) - { - dataDict = []; - AnalysisDataByAddress[address] = dataDict; - } - - dataDict[key] = value; - } - - /// - /// Retrieves analysis data for a specific address - /// - /// The address to retrieve data for - /// The key for the data - /// The stored data, or null if not found - public object? GetAnalysisData(ulong address, string key) - { - if (AnalysisDataByAddress.TryGetValue(address, out var dataDict) && - dataDict.TryGetValue(key, out var value)) - { - return value; - } - - return null; - } - - /// - /// Retrieves typed analysis data for a specific address - /// - /// The type of data to retrieve - /// The address to retrieve data for - /// The key for the data - /// The stored data, or default(T) if not found or wrong type - public T? GetAnalysisData(ulong address, string key) - { - var data = GetAnalysisData(address, key); - if (data is T typedData) - { - return typedData; - } - - return default; - } -} diff --git a/X86Disassembler/Analysers/AsmFunction.cs b/X86Disassembler/Analysers/AsmFunction.cs index 12b3e6f..56803bc 100644 --- a/X86Disassembler/Analysers/AsmFunction.cs +++ b/X86Disassembler/Analysers/AsmFunction.cs @@ -15,73 +15,8 @@ public class AsmFunction /// public List Blocks { get; set; } = []; - /// - /// The entry block of the function - /// - public InstructionBlock? EntryBlock => Blocks.FirstOrDefault(b => b.Address == Address); - - /// - /// The exit blocks of the function (blocks that end with a return instruction) - /// - public List ExitBlocks => Blocks.Where(b => - b.Instructions.Count > 0 && - b.Instructions[^1].Type.IsRet()).ToList(); - - /// - /// The analyzer context for this function - /// - public AnalyzerContext Context { get; private set; } - - /// - /// Creates a new AsmFunction instance - /// - public AsmFunction() - { - Context = new AnalyzerContext(this); - } - - /// - /// Analyzes the function using various analyzers - /// - public void Analyze() - { - // Analyze loops - var loopAnalyzer = new LoopAnalyzer(); - loopAnalyzer.AnalyzeLoops(Context); - - // Analyze data flow - var dataFlowAnalyzer = new DataFlowAnalyzer(); - dataFlowAnalyzer.AnalyzeDataFlow(Context); - } - - /// - /// Returns a string representation of the function, including its address, blocks, and analysis results - /// public override string ToString() { - string loopsInfo = ""; - if (Context.LoopsByHeaderAddress.Count > 0) - { - loopsInfo = $"Loops: {Context.LoopsByHeaderAddress.Count}\n"; - int i = 0; - foreach (var loop in Context.LoopsByHeaderAddress.Values) - { - loopsInfo += $" Loop {i++}: Header=0x{loop.Header.Address:X8}, " + - $"Blocks={loop.Blocks.Count}, " + - $"Back Edge=(0x{loop.BackEdge.From.Address:X8} -> 0x{loop.BackEdge.To.Address:X8}), " + - $"Exits={loop.ExitBlocks.Count}\n"; - } - } - else - { - loopsInfo = "Loops: None\n"; - } - - return $"Function at 0x{Address:X8}\n" + - $"Entry Block: 0x{EntryBlock?.Address.ToString("X8") ?? "None"}\n" + - $"Exit Blocks: {(ExitBlocks.Count > 0 ? string.Join(", ", ExitBlocks.Select(b => $"0x{b.Address:X8}")) : "None")}\n" + - $"Total Blocks: {Blocks.Count}\n" + - loopsInfo + - $"{string.Join("\n", Blocks.Select(x => $"\t{x}"))}"; + return $"{Address:X8}\n{string.Join("\n", Blocks)}"; } } \ No newline at end of file diff --git a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs b/X86Disassembler/Analysers/ControlFlowAnalyzer.cs deleted file mode 100644 index 4b3b5c9..0000000 --- a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs +++ /dev/null @@ -1,303 +0,0 @@ -using X86Disassembler.Analysers.DecompilerTypes; -using X86Disassembler.X86; -using X86Disassembler.X86.Operands; - -namespace X86Disassembler.Analysers; - -/// -/// Analyzes control flow structures in disassembled code -/// -public class ControlFlowAnalyzer -{ - /// - /// The analyzer context - /// - private readonly AnalyzerContext _context; - - /// - /// Creates a new control flow analyzer - /// - /// The analyzer context - public ControlFlowAnalyzer(AnalyzerContext context) - { - _context = context; - } - - /// - /// Analyzes the control flow of a function to identify high-level structures - /// - /// The function to analyze - public void AnalyzeControlFlow(Function function) - { - // First, identify if-else structures - IdentifyIfElseStructures(function); - - // Then, identify switch statements - IdentifySwitchStatements(function); - } - - /// - /// Identifies if-else structures in the control flow graph - /// - /// The function to analyze - private void IdentifyIfElseStructures(Function function) - { - // Now analyze each block for conditional jumps - foreach (var block in function.AsmFunction.Blocks) - { - // Get the last instruction in the block - var lastInstruction = block.Instructions.LastOrDefault(); - if (lastInstruction == null) continue; - - // Check if the last instruction is a conditional jump - if (lastInstruction.Type.IsConditionalJump()) - { - // Get the jump target address - ulong targetAddress = GetJumpTargetAddress(lastInstruction); - - // Find the target block - InstructionBlock? targetBlock = null; - foreach (var b in function.AsmFunction.Blocks) - { - if (b.Address == targetAddress) - { - targetBlock = b; - break; - } - } - - if (targetBlock == null) - { - continue; - } - - // Find the fall-through block (should be in the successors) - InstructionBlock? fallThroughBlock = null; - foreach (var successor in block.Successors) - { - if (successor != targetBlock) - { - fallThroughBlock = successor; - break; - } - } - - if (fallThroughBlock == null) - { - continue; - } - - // Create an if-else structure - var ifElseStructure = new IfElseStructure - { - ConditionBlock = block, - ThenBlock = targetBlock, - ElseBlock = fallThroughBlock - }; - - // Store the if-else structure in the analysis context - function.AsmFunction.Context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure); - } - } - - // Second pass: identify nested if-else structures - foreach (var block in function.AsmFunction.Blocks) - { - var ifElseStructure = _context.GetAnalysisData(block.Address, "IfElseStructure"); - if (ifElseStructure != null) - { - // Check if the 'then' block contains another if-else structure - var nestedThenIf = _context.GetAnalysisData(ifElseStructure.ThenBlock.Address, "IfElseStructure"); - if (nestedThenIf != null) - { - ifElseStructure.NestedThenStructure = nestedThenIf; - } - - // Check if the 'else' block contains another if-else structure - if (ifElseStructure.ElseBlock != null) - { - var nestedElseIf = _context.GetAnalysisData(ifElseStructure.ElseBlock.Address, "IfElseStructure"); - if (nestedElseIf != null) - { - ifElseStructure.NestedElseStructure = nestedElseIf; - } - } - } - } - } - - /// - /// Identifies switch statements in the control flow graph - /// - /// The function to analyze - private void IdentifySwitchStatements(Function function) - { - // For each block in the function - foreach (var block in function.AsmFunction.Blocks) - { - // Look for patterns that indicate a switch statement - // Common patterns include: - // 1. A series of compare and jump instructions - // 2. An indirect jump through a jump table - - // For now, we'll focus on the first pattern (series of compares) - if (IsPotentialSwitchHeader(block)) - { - // This is a potential switch statement - var switchStructure = new SwitchStructure - { - HeaderBlock = block, - Cases = [] - }; - - // Find the cases by analyzing the successors - foreach (var successor in block.Successors) - { - // Each successor is a potential case - switchStructure.Cases.Add(new SwitchCase - { - CaseBlock = successor, - Value = 0 // We'd need more analysis to determine the actual value - }); - } - - // Store the switch structure in the context - _context.StoreAnalysisData(block.Address, "SwitchStructure", switchStructure); - } - } - } - - /// - /// Gets the target address of a jump instruction - /// - /// The jump instruction - /// The target address of the jump - private ulong GetJumpTargetAddress(Instruction instruction) - { - // Add debug output to see the instruction and its operands - - // For conditional jumps, the target address is the first operand - if (instruction.StructuredOperands.Count > 0) - { - var operand = instruction.StructuredOperands[0]; - - if (operand is ImmediateOperand immOp) - { - return (ulong)immOp.Value; - } - else if (operand is RelativeOffsetOperand relOp) - { - // For relative jumps, the target address is directly available in the operand - // We need to convert from file offset to RVA by adding 0x1000 (the section offset) - // This matches how the blocks are converted in BlockDisassembler.cs - ulong rvaTargetAddress = relOp.TargetAddress + 0x1000; - return rvaTargetAddress; - } - } - - // If we can't determine the target, return 0 - return 0; - } - - /// - /// Checks if the given block is a potential switch statement header - /// - /// The block to check - /// True if the block is a potential switch header, false otherwise - private bool IsPotentialSwitchHeader(InstructionBlock block) - { - // A switch header typically has multiple successors - if (block.Successors.Count <= 2) - { - return false; - } - - // Look for patterns that indicate a switch statement - // For now, we'll just check if the block ends with an indirect jump - if (block.Instructions.Count > 0) - { - var lastInstruction = block.Instructions[^1]; - if (lastInstruction.Type == InstructionType.Jmp && - lastInstruction.StructuredOperands.Count > 0 && - !(lastInstruction.StructuredOperands[0] is ImmediateOperand)) - { - return true; - } - } - - return false; - } - - /// - /// Represents an if-else structure in the control flow graph - /// - public class IfElseStructure - { - /// - /// The block containing the condition - /// - public InstructionBlock ConditionBlock { get; set; } = null!; - - /// - /// The block representing the 'then' branch (taken when condition is true) - /// - public InstructionBlock ThenBlock { get; set; } = null!; - - /// - /// The block representing the 'else' branch (taken when condition is false) - /// - public InstructionBlock? ElseBlock { get; set; } - - /// - /// The block where both branches merge back together (if applicable) - /// - public InstructionBlock? MergeBlock { get; set; } - - /// - /// Whether this is a complete if-else structure with a merge point - /// - public bool IsComplete { get; set; } - - /// - /// Nested if-else structure in the 'then' branch (if any) - /// - public IfElseStructure? NestedThenStructure { get; set; } - - /// - /// Nested if-else structure in the 'else' branch (if any) - /// - public IfElseStructure? NestedElseStructure { get; set; } - } - - /// - /// Represents a switch statement in the control flow graph - /// - public class SwitchStructure - { - /// - /// The block containing the switch header - /// - public InstructionBlock HeaderBlock { get; set; } = null!; - - /// - /// The cases of the switch statement - /// - public List Cases { get; set; } = []; - } - - /// - /// Represents a case in a switch statement - /// - public class SwitchCase - { - /// - /// The value of the case - /// - public int Value { get; set; } - - /// - /// The block containing the case code - /// - public InstructionBlock CaseBlock { get; set; } = null!; - } -} diff --git a/X86Disassembler/Analysers/DataFlowAnalyzer.cs b/X86Disassembler/Analysers/DataFlowAnalyzer.cs deleted file mode 100644 index 3f51c16..0000000 --- a/X86Disassembler/Analysers/DataFlowAnalyzer.cs +++ /dev/null @@ -1,384 +0,0 @@ -using X86Disassembler.X86; -using X86Disassembler.X86.Operands; - -namespace X86Disassembler.Analysers; - -/// -/// Analyzes data flow through instructions to track register values -/// -public class DataFlowAnalyzer -{ - // Constants for analysis data keys - private const string REGISTER_VALUE_KEY = "RegisterValue"; - private const string MEMORY_VALUE_KEY = "MemoryValue"; - - /// - /// Represents a known value for a register or memory location - /// - public class ValueInfo - { - /// - /// The type of value (constant, register, memory, unknown) - /// - public enum ValueType - { - Unknown, - Constant, - Register, - Memory - } - - /// - /// The type of this value - /// - public ValueType Type { get; set; } = ValueType.Unknown; - - /// - /// The constant value (if Type is Constant) - /// - public ulong? ConstantValue { get; set; } - - /// - /// The source register (if Type is Register) - /// - public RegisterIndex? SourceRegister { get; set; } - - /// - /// The memory address or expression (if Type is Memory) - /// - public string? MemoryExpression { get; set; } - - /// - /// The instruction that defined this value - /// - public Instruction? DefiningInstruction { get; set; } - - /// - /// Returns a string representation of the value - /// - public override string ToString() - { - return Type switch - { - ValueType.Constant => $"0x{ConstantValue:X8}", - ValueType.Register => $"{SourceRegister}", - ValueType.Memory => $"[{MemoryExpression}]", - _ => "unknown" - }; - } - } - - /// - /// Analyzes data flow in the function and stores results in the analyzer context - /// - /// The analyzer context to store results in - public void AnalyzeDataFlow(AnalyzerContext context) - { - // Process each block in order - foreach (var block in context.Function.Blocks) - { - // Dictionary to track register values within this block - Dictionary registerValues = new(); - - // Process each instruction in the block - foreach (var instruction in block.Instructions) - { - // Process the instruction based on its type - ProcessInstruction(instruction, registerValues, context); - - // Store the current register state at this instruction's address - StoreRegisterState(instruction.Address, registerValues, context); - } - } - } - - /// - /// Processes an instruction to update register values - /// - /// The instruction to process - /// The current register values - /// The analyzer context - private void ProcessInstruction(Instruction instruction, Dictionary registerValues, AnalyzerContext context) - { - // Handle different instruction types - switch (instruction.Type) - { - // MOV instructions - case InstructionType.Mov: - ProcessMovInstruction(instruction, registerValues); - break; - - // XOR instructions - case InstructionType.Xor: - ProcessXorInstruction(instruction, registerValues); - break; - - // ADD instructions - case InstructionType.Add: - ProcessAddInstruction(instruction, registerValues); - break; - - // SUB instructions - case InstructionType.Sub: - ProcessSubInstruction(instruction, registerValues); - break; - - // PUSH/POP instructions can affect register values - case InstructionType.Pop: - ProcessPopInstruction(instruction, registerValues); - break; - - // Call instructions typically clobber certain registers - case InstructionType.Call: - ProcessCallInstruction(instruction, registerValues); - break; - - // Other instructions that modify registers - default: - // For now, mark destination registers as unknown for unsupported instructions - if (instruction.StructuredOperands.Count > 0 && - instruction.StructuredOperands[0] is RegisterOperand regOp) - { - registerValues[regOp.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Unknown, - DefiningInstruction = instruction - }; - } - - break; - } - } - - /// - /// Processes a MOV instruction to update register values - /// - private void ProcessMovInstruction(Instruction instruction, Dictionary registerValues) - { - // Handle different MOV variants - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // MOV reg, imm - if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc) - { - registerValues[destReg.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Constant, - ConstantValue = immSrc.Value, - DefiningInstruction = instruction - }; - } - // MOV reg, reg - else if (dest is RegisterOperand destReg2 && src is RegisterOperand srcReg) - { - if (registerValues.TryGetValue(srcReg.Register, out var srcValue)) - { - // Copy the source value - registerValues[destReg2.Register] = new ValueInfo - { - Type = srcValue.Type, - ConstantValue = srcValue.ConstantValue, - SourceRegister = srcValue.SourceRegister, - MemoryExpression = srcValue.MemoryExpression, - DefiningInstruction = instruction - }; - } - else - { - // Source register value is unknown - registerValues[destReg2.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Register, - SourceRegister = srcReg.Register, - DefiningInstruction = instruction - }; - } - } - // MOV reg, [mem] - else if (dest is RegisterOperand destReg3 && src is MemoryOperand memSrc) - { - registerValues[destReg3.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Memory, - MemoryExpression = memSrc.ToString(), - DefiningInstruction = instruction - }; - } - // MOV [mem], reg or MOV [mem], imm - // These don't update register values, so we don't need to handle them here - } - } - - /// - /// Processes an XOR instruction to update register values - /// - private void ProcessXorInstruction(Instruction instruction, Dictionary registerValues) - { - // Handle XOR reg, reg (often used for zeroing a register) - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // XOR reg, same_reg (zeroing idiom) - if (dest is RegisterOperand destReg && src is RegisterOperand srcReg && - destReg.Register == srcReg.Register) - { - registerValues[destReg.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Constant, - ConstantValue = 0, - DefiningInstruction = instruction - }; - } - // Other XOR operations make the result unknown - else if (dest is RegisterOperand destReg2) - { - registerValues[destReg2.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Unknown, - DefiningInstruction = instruction - }; - } - } - } - - /// - /// Processes an ADD instruction to update register values - /// - private void ProcessAddInstruction(Instruction instruction, Dictionary registerValues) - { - // Handle ADD reg, imm where we know the register value - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // ADD reg, imm where reg is a known constant - if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc && - registerValues.TryGetValue(destReg.Register, out var destValue) && - destValue.Type == ValueInfo.ValueType.Constant && - destValue.ConstantValue.HasValue) - { - // Calculate the new constant value - registerValues[destReg.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Constant, - ConstantValue = (uint?) (destValue.ConstantValue.Value + immSrc.Value), - DefiningInstruction = instruction - }; - } - // Other ADD operations make the result unknown - else if (dest is RegisterOperand destReg2) - { - registerValues[destReg2.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Unknown, - DefiningInstruction = instruction - }; - } - } - } - - /// - /// Processes a SUB instruction to update register values - /// - private void ProcessSubInstruction(Instruction instruction, Dictionary registerValues) - { - // Handle SUB reg, imm where we know the register value - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // SUB reg, imm where reg is a known constant - if (dest is RegisterOperand destReg && src is ImmediateOperand immSrc && - registerValues.TryGetValue(destReg.Register, out var destValue) && - destValue.Type == ValueInfo.ValueType.Constant && - destValue.ConstantValue.HasValue) - { - // Calculate the new constant value - registerValues[destReg.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Constant, - ConstantValue = (uint?) (destValue.ConstantValue.Value - immSrc.Value), - DefiningInstruction = instruction - }; - } - // Other SUB operations make the result unknown - else if (dest is RegisterOperand destReg2) - { - registerValues[destReg2.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Unknown, - DefiningInstruction = instruction - }; - } - } - } - - /// - /// Processes a POP instruction to update register values - /// - private void ProcessPopInstruction(Instruction instruction, Dictionary registerValues) - { - // POP reg makes the register value unknown (comes from stack) - if (instruction.StructuredOperands.Count >= 1 && - instruction.StructuredOperands[0] is RegisterOperand destReg) - { - registerValues[destReg.Register] = new ValueInfo - { - Type = ValueInfo.ValueType.Unknown, - DefiningInstruction = instruction - }; - } - } - - /// - /// Processes a CALL instruction to update register values - /// - private void ProcessCallInstruction(Instruction instruction, Dictionary registerValues) - { - // CALL instructions typically clobber EAX, ECX, and EDX in x86 calling conventions - registerValues[RegisterIndex.A] = new ValueInfo - { - Type = ValueInfo.ValueType.Unknown, - DefiningInstruction = instruction - }; - - registerValues[RegisterIndex.C] = new ValueInfo - { - Type = ValueInfo.ValueType.Unknown, - DefiningInstruction = instruction - }; - - registerValues[RegisterIndex.D] = new ValueInfo - { - Type = ValueInfo.ValueType.Unknown, - DefiningInstruction = instruction - }; - } - - /// - /// Stores the current register state at the given address - /// - private void StoreRegisterState(ulong address, Dictionary registerValues, AnalyzerContext context) - { - // Create a copy of the register values to store - var registerValuesCopy = new Dictionary(registerValues); - - // Store in the context - context.StoreAnalysisData(address, REGISTER_VALUE_KEY, registerValuesCopy); - } - - /// - /// Gets the register values at the given address - /// - public static Dictionary? GetRegisterValues(ulong address, AnalyzerContext context) - { - return context.GetAnalysisData>(address, REGISTER_VALUE_KEY); - } -} \ No newline at end of file diff --git a/X86Disassembler/Analysers/DecompilerEngine.cs b/X86Disassembler/Analysers/DecompilerEngine.cs deleted file mode 100644 index f6c3e01..0000000 --- a/X86Disassembler/Analysers/DecompilerEngine.cs +++ /dev/null @@ -1,149 +0,0 @@ -using X86Disassembler.Analysers.DecompilerTypes; -using X86Disassembler.PE; -using X86Disassembler.X86; - -namespace X86Disassembler.Analysers; - -/// -/// Main engine for decompiling x86 code -/// -public class DecompilerEngine -{ - /// - /// The PE file being analyzed - /// - private readonly PeFile _peFile; - - /// - /// Dictionary of analyzed functions by address - /// - private readonly Dictionary _functions = []; - - /// - /// Dictionary of exported function names by address - /// - private readonly Dictionary _exportedFunctions = []; - - /// - /// Creates a new decompiler engine for the specified PE file - /// - /// The PE file to decompile - public DecompilerEngine(PeFile peFile) - { - _peFile = peFile; - - // Initialize the exported functions dictionary - foreach (var export in peFile.ExportedFunctions) - { - _exportedFunctions[export.AddressRva] = export.Name; - } - } - - /// - /// Decompiles a function at the specified address - /// - /// The address of the function to decompile - /// The decompiled function - public Function DecompileFunction(ulong address) - { - // Check if we've already analyzed this function - if (_functions.TryGetValue(address, out var existingFunction)) - { - return existingFunction; - } - - // Find the code section containing this address - var codeSection = _peFile.SectionHeaders.Find(s => - s.ContainsCode() && - address >= s.VirtualAddress && - address < s.VirtualAddress + s.VirtualSize); - - if (codeSection == null) - { - throw new InvalidOperationException($"No code section found containing address 0x{address:X8}"); - } - - // Get the section data - int sectionIndex = _peFile.SectionHeaders.IndexOf(codeSection); - byte[] codeBytes = _peFile.GetSectionData(sectionIndex); - - // Create a disassembler for the code section - var disassembler = new BlockDisassembler(codeBytes, codeSection.VirtualAddress); - - // Disassemble the function - var asmFunction = disassembler.DisassembleFromAddress((uint)address); - - // Create an analyzer context - var context = new AnalyzerContext(asmFunction); - - // Run the analyzers - var loopAnalyzer = new LoopAnalyzer(); - loopAnalyzer.AnalyzeLoops(context); - - var dataFlowAnalyzer = new DataFlowAnalyzer(); - dataFlowAnalyzer.AnalyzeDataFlow(context); - - // Get the function name from exports if available - string functionName = _exportedFunctions.TryGetValue(address, out var name) - ? name - : $"func_{address:X8}"; - - // Analyze the function - var functionAnalyzer = new FunctionAnalyzer(context); - var function = functionAnalyzer.AnalyzeFunction(address, functionName); - - // Analyze control flow structures - var controlFlowAnalyzer = new ControlFlowAnalyzer(context); - controlFlowAnalyzer.AnalyzeControlFlow(function); - - - - // Store the function in our cache - _functions[address] = function; - - return function; - } - - /// - /// Generates C-like pseudocode for a decompiled function - /// - /// The function to generate pseudocode for - /// The generated pseudocode - public string GeneratePseudocode(Function function) - { - // Create a pseudocode generator - var generator = new PseudocodeGenerator(); - - // Generate the pseudocode - return generator.GeneratePseudocode(function); - } - - /// - /// Decompiles all exported functions in the PE file - /// - /// A dictionary of decompiled functions by address - public Dictionary DecompileAllExportedFunctions() - { - foreach (var export in _peFile.ExportedFunctions) - { - // Skip forwarded exports - if (export.IsForwarder) - { - continue; - } - - try - { - DecompileFunction(export.AddressRva); - } - catch (Exception ex) - { - Console.WriteLine($"Error decompiling function {export.Name} at 0x{export.AddressRva:X8}: {ex.Message}"); - } - } - - return _functions; - } -} - - diff --git a/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs b/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs deleted file mode 100644 index eed8bd8..0000000 --- a/X86Disassembler/Analysers/DecompilerTypes/CallingConvention.cs +++ /dev/null @@ -1,58 +0,0 @@ -namespace X86Disassembler.Analysers.DecompilerTypes; - -/// -/// Represents a calling convention used by a function -/// -public enum CallingConvention -{ - /// - /// C declaration calling convention (caller cleans the stack) - /// Parameters are pushed right-to-left - /// EAX, ECX, EDX are caller-saved - /// EBX, ESI, EDI, EBP are callee-saved - /// Return value in EAX (or EDX:EAX for 64-bit values) - /// - Cdecl, - - /// - /// Standard calling convention (callee cleans the stack) - /// Parameters are pushed right-to-left - /// EAX, ECX, EDX are caller-saved - /// EBX, ESI, EDI, EBP are callee-saved - /// Return value in EAX (or EDX:EAX for 64-bit values) - /// - Stdcall, - - /// - /// Fast calling convention - /// First two parameters in ECX and EDX, rest on stack right-to-left - /// EAX, ECX, EDX are caller-saved - /// EBX, ESI, EDI, EBP are callee-saved - /// Return value in EAX - /// Callee cleans the stack - /// - Fastcall, - - /// - /// This calling convention (C++ member functions) - /// 'this' pointer in ECX, other parameters pushed right-to-left - /// EAX, ECX, EDX are caller-saved - /// EBX, ESI, EDI, EBP are callee-saved - /// Return value in EAX - /// Caller cleans the stack - /// - Thiscall, - - /// - /// Microsoft vectorcall convention - /// First six parameters in registers (XMM0-XMM5 for floating point, ECX, EDX, R8, R9 for integers) - /// Additional parameters pushed right-to-left - /// Return value in EAX or XMM0 - /// - Vectorcall, - - /// - /// Unknown calling convention - /// - Unknown -} diff --git a/X86Disassembler/Analysers/DecompilerTypes/DataType.cs b/X86Disassembler/Analysers/DecompilerTypes/DataType.cs deleted file mode 100644 index 85b0d90..0000000 --- a/X86Disassembler/Analysers/DecompilerTypes/DataType.cs +++ /dev/null @@ -1,190 +0,0 @@ -namespace X86Disassembler.Analysers.DecompilerTypes; - -/// -/// Represents a data type in decompiled code -/// -public class DataType -{ - /// - /// The category of the data type - /// - public enum TypeCategory - { - /// - /// Unknown type - /// - Unknown, - - /// - /// Void type (no value) - /// - Void, - - /// - /// Integer type - /// - Integer, - - /// - /// Floating point type - /// - Float, - - /// - /// Pointer type - /// - Pointer, - - /// - /// Structure type - /// - Struct, - - /// - /// Array type - /// - Array, - - /// - /// Function type - /// - Function - } - - /// - /// The name of the type - /// - public string Name { get; set; } = string.Empty; - - /// - /// The category of the type - /// - public TypeCategory Category { get; set; } - - /// - /// The size of the type in bytes - /// - public int Size { get; set; } - - /// - /// Whether the type is signed (for integer types) - /// - public bool IsSigned { get; set; } - - /// - /// The pointed-to type (for pointer types) - /// - public DataType? PointedType { get; set; } - - /// - /// The element type (for array types) - /// - public DataType? ElementType { get; set; } - - /// - /// The number of elements (for array types) - /// - public int? ElementCount { get; set; } - - /// - /// The fields of the structure (for struct types) - /// - public List Fields { get; set; } = []; - - /// - /// Creates a new data type with the specified name and category - /// - /// The name of the type - /// The category of the type - /// The size of the type in bytes - public DataType(string name, TypeCategory category, int size) - { - Name = name; - Category = category; - Size = size; - } - - /// - /// Returns a string representation of the type - /// - public override string ToString() - { - return Name; - } - - /// - /// Creates a pointer type to the specified type - /// - /// The type being pointed to - /// A new pointer type - public static DataType CreatePointerType(DataType pointedType) - { - return new DataType($"{pointedType.Name}*", TypeCategory.Pointer, 4) - { - PointedType = pointedType - }; - } - - /// - /// Creates an array type of the specified element type and count - /// - /// The type of the array elements - /// The number of elements in the array - /// A new array type - public static DataType CreateArrayType(DataType elementType, int count) - { - return new DataType($"{elementType.Name}[{count}]", TypeCategory.Array, elementType.Size * count) - { - ElementType = elementType, - ElementCount = count - }; - } - - /// - /// Common predefined types - /// - public static readonly DataType Unknown = new DataType("unknown", TypeCategory.Unknown, 0); - public static readonly DataType Void = new DataType("void", TypeCategory.Void, 0); - public static readonly DataType Char = new DataType("char", TypeCategory.Integer, 1) { IsSigned = true }; - public static readonly DataType UChar = new DataType("unsigned char", TypeCategory.Integer, 1); - public static readonly DataType Short = new DataType("short", TypeCategory.Integer, 2) { IsSigned = true }; - public static readonly DataType UShort = new DataType("unsigned short", TypeCategory.Integer, 2); - public static readonly DataType Int = new DataType("int", TypeCategory.Integer, 4) { IsSigned = true }; - public static readonly DataType UInt = new DataType("unsigned int", TypeCategory.Integer, 4); - public static readonly DataType Float = new DataType("float", TypeCategory.Float, 4); - public static readonly DataType Double = new DataType("double", TypeCategory.Float, 8); -} - -/// -/// Represents a field in a structure -/// -public class StructField -{ - /// - /// The name of the field - /// - public string Name { get; set; } = string.Empty; - - /// - /// The type of the field - /// - public DataType Type { get; set; } = DataType.Unknown; - - /// - /// The offset of the field within the structure - /// - public int Offset { get; set; } - - /// - /// Creates a new structure field - /// - /// The name of the field - /// The type of the field - /// The offset of the field within the structure - public StructField(string name, DataType type, int offset) - { - Name = name; - Type = type; - Offset = offset; - } -} diff --git a/X86Disassembler/Analysers/DecompilerTypes/Function.cs b/X86Disassembler/Analysers/DecompilerTypes/Function.cs deleted file mode 100644 index 29af312..0000000 --- a/X86Disassembler/Analysers/DecompilerTypes/Function.cs +++ /dev/null @@ -1,98 +0,0 @@ -using X86Disassembler.X86; -using X86Disassembler.X86.Operands; - -namespace X86Disassembler.Analysers.DecompilerTypes; - -/// -/// Represents a function in decompiled code -/// -public class Function -{ - /// - /// The name of the function - /// - public string Name { get; set; } = string.Empty; - - /// - /// The address of the function - /// - public ulong Address { get; set; } - - /// - /// The return type of the function - /// - public DataType ReturnType { get; set; } = DataType.Void; - - /// - /// The parameters of the function - /// - public List Parameters { get; set; } = []; - - /// - /// Local variables in this function - /// - public List LocalVariables { get; } = []; - - /// - /// Variables stored in registers - /// - public List RegisterVariables { get; } = []; - - /// - /// The calling convention used by the function - /// - public CallingConvention CallingConvention { get; set; } = CallingConvention.Cdecl; - - /// - /// The assembly function representation - /// - public AsmFunction AsmFunction { get; set; } - - /// - /// Creates a new function with the specified name and address - /// - /// The name of the function - /// The address of the function - /// The assembly function representation - public Function(string name, ulong address, AsmFunction asmFunction) - { - Name = name; - Address = address; - AsmFunction = asmFunction; - } - - /// - /// Analyzes the function to identify variables - /// - public void AnalyzeVariables() - { - // Create a variable analyzer - var variableAnalyzer = new VariableAnalyzer(AsmFunction.Context); - - // Analyze stack variables - variableAnalyzer.AnalyzeStackVariables(this); - } - - - - - - - - /// - /// Returns a string representation of the function signature - /// - public string GetSignature() - { - string paramList = string.Join(", ", Parameters.Select(p => $"{p.Type} {p.Name}")); - return $"{ReturnType} {Name}({paramList})"; - } - - /// - /// Returns a string representation of the function - /// - public override string ToString() - { - return GetSignature(); - } -} diff --git a/X86Disassembler/Analysers/DecompilerTypes/Variable.cs b/X86Disassembler/Analysers/DecompilerTypes/Variable.cs deleted file mode 100644 index c428824..0000000 --- a/X86Disassembler/Analysers/DecompilerTypes/Variable.cs +++ /dev/null @@ -1,102 +0,0 @@ -namespace X86Disassembler.Analysers.DecompilerTypes; - -/// -/// Represents a variable in decompiled code -/// -public class Variable -{ - /// - /// The type of storage for a variable - /// - public enum StorageType - { - /// - /// Variable stored on the stack (local variable) - /// - Stack, - - /// - /// Variable stored in a register - /// - Register, - - /// - /// Variable stored in global memory - /// - Global, - - /// - /// Function parameter passed on the stack - /// - Parameter, - - /// - /// Function parameter passed in a register - /// - RegisterParameter - } - - /// - /// The name of the variable - /// - public string Name { get; set; } = string.Empty; - - /// - /// The type of the variable - /// - public DataType Type { get; set; } = DataType.Unknown; - - /// - /// The storage location of the variable - /// - public StorageType Storage { get; set; } - - /// - /// The offset from the base pointer (for stack variables) - /// - public int? StackOffset { get; set; } - - /// - /// The register that holds this variable (for register variables) - /// - public X86.RegisterIndex? Register { get; set; } - - /// - /// The memory address (for global variables) - /// - public ulong? Address { get; set; } - - /// - /// The size of the variable in bytes - /// - public int Size { get; set; } - - /// - /// Whether this variable is a function parameter - /// - public bool IsParameter { get; set; } - - /// - /// The parameter index (if this is a parameter) - /// - public int? ParameterIndex { get; set; } - - /// - /// Creates a new variable with the specified name and type - /// - /// The name of the variable - /// The type of the variable - public Variable(string name, DataType type) - { - Name = name; - Type = type; - } - - /// - /// Returns a string representation of the variable - /// - public override string ToString() - { - return $"{Type} {Name}"; - } -} diff --git a/X86Disassembler/Analysers/FileAbsoluteAddress.cs b/X86Disassembler/Analysers/FileAbsoluteAddress.cs new file mode 100644 index 0000000..c3fc01c --- /dev/null +++ b/X86Disassembler/Analysers/FileAbsoluteAddress.cs @@ -0,0 +1,56 @@ +namespace X86Disassembler.Analysers; + +public abstract class Address(ulong value, ulong imageBase) +{ + /// + /// The actual value of the address, not specifically typed. + /// + protected readonly ulong Value = value; + + /// + /// PE.ImageBase from which this address is constructed + /// + protected readonly ulong ImageBase = imageBase; +} + +/// +/// Absolute address in the PE file +/// +public class FileAbsoluteAddress(ulong value, ulong imageBase) : Address(value, imageBase) +{ + public ulong GetValue() + { + return Value; + } + + public virtual VirtualAddress AsImageBaseAddress() + { + return new VirtualAddress(Value + ImageBase, ImageBase); + } + + public virtual FileAbsoluteAddress AsFileAbsolute() + { + return this; + } +} + +/// +/// Address from PE.ImageBase +/// +public class VirtualAddress : FileAbsoluteAddress +{ + public VirtualAddress(ulong value, ulong imageBase) : base(value, imageBase) + { + } + + public override VirtualAddress AsImageBaseAddress() + { + return this; + } + + public override FileAbsoluteAddress AsFileAbsolute() + { + return new FileAbsoluteAddress(Value - ImageBase, ImageBase); + } +} + diff --git a/X86Disassembler/Analysers/FunctionAnalyzer.cs b/X86Disassembler/Analysers/FunctionAnalyzer.cs deleted file mode 100644 index fac2e1d..0000000 --- a/X86Disassembler/Analysers/FunctionAnalyzer.cs +++ /dev/null @@ -1,132 +0,0 @@ -using X86Disassembler.Analysers.DecompilerTypes; -using X86Disassembler.X86; -using X86Disassembler.X86.Operands; - -namespace X86Disassembler.Analysers; - -/// -/// Analyzes disassembled functions to identify variables, parameters, and control flow structures -/// -public class FunctionAnalyzer -{ - /// - /// The analyzer context - /// - private readonly AnalyzerContext _context; - - /// - /// Creates a new function analyzer - /// - /// The analyzer context - public FunctionAnalyzer(AnalyzerContext context) - { - _context = context; - } - - /// - /// Analyzes a function at the specified address - /// - /// The address of the function - /// The name of the function (if known) - /// The analyzed function - public Function AnalyzeFunction(ulong address, string name = "") - { - // If no name is provided, generate one based on the address - if (string.IsNullOrEmpty(name)) - { - name = $"func_{address:X8}"; - } - - // Create a function object - var function = new Function(name, address, _context.Function) - { - ReturnType = DataType.Unknown // Default to unknown return type - }; - - // Create a variable analyzer and analyze variables - var variableAnalyzer = new VariableAnalyzer(_context); - variableAnalyzer.AnalyzeStackVariables(function); - - // Determine the calling convention - DetermineCallingConvention(function); - - // Infer parameter and return types - InferTypes(function); - - return function; - } - - /// - /// Determines the calling convention of a function based on its behavior - /// - /// The function to analyze - private void DetermineCallingConvention(Function function) - { - // By default, we'll assume cdecl - function.CallingConvention = CallingConvention.Cdecl; - - // Get the exit blocks (blocks with ret instructions) - var exitBlocks = function.AsmFunction.Blocks.Where(b => - b.Instructions.Count > 0 && - b.Instructions.Last().Type == InstructionType.Ret).ToList(); - - // Check if the function cleans up its own stack - bool cleansOwnStack = false; - - // Look for ret instructions with an immediate operand - foreach (var block in function.AsmFunction.Blocks) - { - var lastInstruction = block.Instructions.LastOrDefault(); - if (lastInstruction != null && lastInstruction.Type == InstructionType.Ret) - { - // If the ret instruction has an immediate operand, it's cleaning its own stack - if (lastInstruction.StructuredOperands.Count > 0 && - lastInstruction.StructuredOperands[0] is ImmediateOperand immOp && - immOp.Value > 0) - { - cleansOwnStack = true; - break; - } - } - } - - // If the function cleans its own stack, it's likely stdcall - if (cleansOwnStack) - { - function.CallingConvention = CallingConvention.Stdcall; - - // Check for thiscall (ECX used for this pointer) - // This would require more sophisticated analysis of register usage - } - - // Check for fastcall (first two parameters in ECX and EDX) - // This would require more sophisticated analysis of register usage - } - - /// - /// Infers types for parameters and local variables based on their usage - /// - /// The function to analyze - private void InferTypes(Function function) - { - // This is a complex analysis that would require tracking how variables are used - // For now, we'll just set default types - - // Set return type based on register usage - function.ReturnType = DataType.Int; // Default to int - - // For each parameter, try to infer its type - foreach (var param in function.Parameters) - { - // Default to int for now - param.Type = DataType.Int; - } - - // For each local variable, try to infer its type - foreach (var localVar in function.LocalVariables) - { - // Default to int for now - localVar.Type = DataType.Int; - } - } -} diff --git a/X86Disassembler/Analysers/LoopAnalyzer.cs b/X86Disassembler/Analysers/LoopAnalyzer.cs deleted file mode 100644 index f68624e..0000000 --- a/X86Disassembler/Analysers/LoopAnalyzer.cs +++ /dev/null @@ -1,120 +0,0 @@ -namespace X86Disassembler.Analysers; - -/// -/// Analyzes the control flow graph to identify loops -/// -public class LoopAnalyzer -{ - /// - /// Identifies loops in the given function and stores them in the analyzer context - /// - /// The analyzer context to store results in - public void AnalyzeLoops(AnalyzerContext context) - { - // A back edge is an edge from a node to one of its dominators - // For our simplified approach, we'll identify back edges as edges that point to blocks - // with a lower address (potential loop headers) - foreach (var block in context.Function.Blocks) - { - foreach (var successor in block.Successors) - { - // If the successor has a lower address than the current block, - // it's potentially a back edge forming a loop - if (successor.Address < block.Address) - { - // Create a new loop with the identified back edge - var loop = new AnalyzerContext.Loop - { - Header = successor, - BackEdge = (block, successor) - }; - - // Find all blocks in the loop using a breadth-first search - FindLoopBlocks(loop); - - // Find the exit blocks of the loop - FindLoopExits(loop); - - // Store the loop in the context - context.LoopsByHeaderAddress[successor.Address] = loop; - - // Update the blocks-to-loops mapping - foreach (var loopBlock in loop.Blocks) - { - if (!context.LoopsByBlockAddress.TryGetValue(loopBlock.Address, out var loops)) - { - loops = []; - context.LoopsByBlockAddress[loopBlock.Address] = loops; - } - - loops.Add(loop); - } - } - } - } - } - - /// - /// Finds all blocks that are part of the loop - /// - /// The loop to analyze - private void FindLoopBlocks(AnalyzerContext.Loop loop) - { - // Start with the header block - loop.Blocks.Add(loop.Header); - - // Use a queue for breadth-first search - Queue queue = new Queue(); - queue.Enqueue(loop.BackEdge.From); // Start from the back edge source - - // Keep track of visited blocks to avoid cycles - HashSet visited = new HashSet { loop.Header.Address }; - - while (queue.Count > 0) - { - var block = queue.Dequeue(); - - // If we've already processed this block, skip it - if (!visited.Add(block.Address)) - { - continue; - } - - // Add the block to the loop - loop.Blocks.Add(block); - - // Add all predecessors to the queue (except those that would take us outside the loop) - foreach (var predecessor in block.Predecessors) - { - // Skip the header's predecessors that aren't in the loop already - // (to avoid including blocks outside the loop) - if (block == loop.Header && !loop.Blocks.Contains(predecessor) && predecessor != loop.BackEdge.From) - { - continue; - } - - queue.Enqueue(predecessor); - } - } - } - - /// - /// Finds all exit blocks of the loop (blocks that have successors outside the loop) - /// - /// The loop to analyze - private void FindLoopExits(AnalyzerContext.Loop loop) - { - foreach (var block in loop.Blocks) - { - foreach (var successor in block.Successors) - { - // If the successor is not part of the loop, this block is an exit - if (!loop.Blocks.Contains(successor)) - { - loop.ExitBlocks.Add(block); - break; // Once we've identified this block as an exit, we can stop checking its successors - } - } - } - } -} diff --git a/X86Disassembler/Analysers/PseudocodeGenerator.cs b/X86Disassembler/Analysers/PseudocodeGenerator.cs deleted file mode 100644 index 9e9b606..0000000 --- a/X86Disassembler/Analysers/PseudocodeGenerator.cs +++ /dev/null @@ -1,862 +0,0 @@ -using System.Text; -using X86Disassembler.Analysers.DecompilerTypes; -using X86Disassembler.X86; -using X86Disassembler.X86.Operands; - -namespace X86Disassembler.Analysers; - -/// -/// Generates C-like pseudocode from decompiled functions -/// -public class PseudocodeGenerator -{ - /// - /// Generates pseudocode for a decompiled function - /// - /// The function to generate pseudocode for - /// The generated pseudocode - public string GeneratePseudocode(Function function) - { - var result = new StringBuilder(); - - // Add function signature - result.AppendLine($"{function.ReturnType} {function.Name}({string.Join(", ", function.Parameters.Select(p => $"{p.Type} {p.Name}"))})") - .AppendLine("{"); - - // Add local variable declarations - foreach (var localVar in function.LocalVariables) - { - result.AppendLine($" {localVar.Type} {localVar.Name}; // Stack offset: {localVar.StackOffset}"); - } - - // Add register variable declarations - foreach (var regVar in function.RegisterVariables) - { - result.AppendLine($" {regVar.Type} {regVar.Name}; // Register: {RegisterMapper.GetRegisterName(regVar.Register!.Value, 32)}"); - } - - if (function.LocalVariables.Count > 0 || function.RegisterVariables.Count > 0) - { - result.AppendLine(); - } - - // Generate the function body using control flow analysis - GenerateFunctionBody(function, result, 1); - - // Add a return statement - result.AppendLine() - .AppendLine(" return 0; // Placeholder return value") - .AppendLine("}"); - - return result.ToString(); - } - - /// - /// Generates the body of the function using control flow analysis - /// - /// The function to generate code for - /// The string builder to append to - /// The current indentation level - private void GenerateFunctionBody(Function function, StringBuilder result, int indentLevel) - { - // Try to find the entry block - var entryBlock = function.AsmFunction.EntryBlock; - - // If the entry block is not found, try to find a block with an address that matches the function address minus the base address - if (entryBlock == null && function.AsmFunction.Blocks.Count > 0) - { - // Get the first block as a fallback - entryBlock = function.AsmFunction.Blocks[0]; - - // Log a warning but continue with the first block - result.AppendLine($"{new string(' ', indentLevel * 4)}// Warning: Entry block not found at address 0x{function.Address:X8}, using first block at 0x{entryBlock.Address:X8}"); - } - else if (entryBlock == null) - { - result.AppendLine($"{new string(' ', indentLevel * 4)}// Function body could not be decompiled - no blocks found"); - return; - } - - // Process blocks in order, starting from the entry block - var processedBlocks = new HashSet(); - GenerateBlockCode(function, entryBlock, result, indentLevel, processedBlocks); - } - - /// - /// Generates code for a basic block and its successors - /// - /// The function containing the block - /// The block to generate code for - /// The string builder to append to - /// The current indentation level - /// Set of blocks that have already been processed - private void GenerateBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet processedBlocks) - { - // Check if we've already processed this block - if (processedBlocks.Contains(block.Address)) - { - return; - } - - // Mark this block as processed - processedBlocks.Add(block.Address); - - // Check if this block is part of a control flow structure - var context = function.AsmFunction.Context; - - // Check for if-else structure - var ifElseStructure = context.GetAnalysisData(block.Address, "IfElseStructure"); - if (ifElseStructure != null && ifElseStructure.ConditionBlock.Address == block.Address) - { - // This block is the condition of an if-else structure - GenerateIfElseCode(function, ifElseStructure, result, indentLevel, processedBlocks); - return; - } - - // Check for switch structure - var switchStructure = context.GetAnalysisData(block.Address, "SwitchStructure"); - if (switchStructure != null && switchStructure.HeaderBlock.Address == block.Address) - { - // This block is the header of a switch structure - GenerateSwitchCode(function, switchStructure, result, indentLevel, processedBlocks); - return; - } - - // Check if this block is part of a loop - var loops = context.LoopsByBlockAddress.TryGetValue(block.Address, out var blockLoops) ? blockLoops : null; - if (loops != null && loops.Count > 0) - { - // Get the innermost loop - var loop = loops[0]; - - // Check if this is the loop header - if (loop.Header.Address == block.Address) - { - // This block is the header of a loop - GenerateLoopCode(function, loop, result, indentLevel, processedBlocks); - return; - } - } - - // If we get here, this is a regular block - GenerateRegularBlockCode(function, block, result, indentLevel, processedBlocks); - } - - /// - /// Generates code for a regular basic block - /// - /// The function containing the block - /// The block to generate code for - /// The string builder to append to - /// The current indentation level - /// Set of blocks that have already been processed - private void GenerateRegularBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet processedBlocks) - { - // Add a comment with the block address - result.AppendLine($"{new string(' ', indentLevel * 4)}// Block at 0x{block.Address:X8}"); - - // Check if this block ends with a conditional jump - bool hasConditionalJump = block.Instructions.Count > 0 && - IsConditionalJump(block.Instructions[^1].Type); - - // Add debug info about conditional jumps - if (hasConditionalJump) - { - var jumpInstruction = block.Instructions[^1]; - result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Conditional jump {jumpInstruction} detected"); - - // Get the jump target address - ulong targetAddress = GetJumpTargetAddress(jumpInstruction); - result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Jump target: 0x{targetAddress:X8}"); - - // Check if we can find a comparison instruction before the jump - Instruction? comparisonInstruction = null; - for (int i = block.Instructions.Count - 2; i >= 0 && i >= block.Instructions.Count - 5; i--) - { - var instruction = block.Instructions[i]; - if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test) - { - comparisonInstruction = instruction; - break; - } - } - - if (comparisonInstruction != null) - { - result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: Found comparison: {comparisonInstruction}"); - } - else - { - result.AppendLine($"{new string(' ', indentLevel * 4)}// DEBUG: No comparison instruction found"); - } - } - - // If this block has a conditional jump but wasn't detected as an if-else structure, - // we'll create an inline if statement for better readability - if (hasConditionalJump && block.Successors.Count == 2) - { - // Get the last instruction (conditional jump) - var jumpInstruction = block.Instructions[^1]; - - // Generate condition based on the jump type - string condition = GenerateConditionFromJump(jumpInstruction); - - // Generate code for all instructions except the last one (the jump) - for (int i = 0; i < block.Instructions.Count - 1; i++) - { - var instruction = block.Instructions[i]; - - // Skip prologue/epilogue instructions - if (IsPrologueOrEpilogueInstruction(instruction)) - { - continue; - } - - // Generate pseudocode for this instruction - var pseudocode = GenerateInstructionPseudocode(function, instruction); - if (!string.IsNullOrEmpty(pseudocode)) - { - result.AppendLine($"{new string(' ', indentLevel * 4)}{pseudocode}"); - } - else - { - // If we couldn't generate pseudocode, add the instruction as a comment - result.AppendLine($"{new string(' ', indentLevel * 4)}/* {instruction} */;"); - } - } - - // Generate the if statement - result.AppendLine($"{new string(' ', indentLevel * 4)}if ({condition})"); - result.AppendLine($"{new string(' ', indentLevel * 4)}{{"); - - // Find the target block (true branch) - var targetAddress = GetJumpTargetAddress(jumpInstruction); - var targetBlock = block.Successors.FirstOrDefault(s => s.Address == targetAddress); - - if (targetBlock != null) - { - // Generate code for the target block - GenerateBlockCode(function, targetBlock, result, indentLevel + 1, processedBlocks); - } - - result.AppendLine($"{new string(' ', indentLevel * 4)}}}"); - - // Find the fallthrough block (false branch) - var fallthroughBlock = block.Successors.FirstOrDefault(s => s.Address != targetAddress); - - if (fallthroughBlock != null && !processedBlocks.Contains(fallthroughBlock.Address)) - { - // Generate code for the fallthrough block - GenerateBlockCode(function, fallthroughBlock, result, indentLevel, processedBlocks); - } - } - else - { - // Regular block processing - // Generate code for each instruction in the block - foreach (var instruction in block.Instructions) - { - // Skip prologue/epilogue instructions - if (IsPrologueOrEpilogueInstruction(instruction)) - { - continue; - } - - // Generate pseudocode for this instruction - var pseudocode = GenerateInstructionPseudocode(function, instruction); - if (!string.IsNullOrEmpty(pseudocode)) - { - result.AppendLine($"{new string(' ', indentLevel * 4)}{pseudocode}"); - } - else - { - // If we couldn't generate pseudocode, add the instruction as a comment - result.AppendLine($"{new string(' ', indentLevel * 4)}/* {instruction} */;"); - } - } - - // Process successors in order - foreach (var successor in block.Successors) - { - // Only process successors that haven't been processed yet - if (!processedBlocks.Contains(successor.Address)) - { - GenerateBlockCode(function, successor, result, indentLevel, processedBlocks); - } - } - } - } - - /// - /// Generates code for an if-else structure - /// - /// The function containing the structure - /// The if-else structure to generate code for - /// The string builder to append to - /// The current indentation level - /// Set of blocks that have already been processed - private void GenerateIfElseCode(Function function, ControlFlowAnalyzer.IfElseStructure ifElseStructure, StringBuilder result, int indentLevel, HashSet processedBlocks) - { - // Mark the condition block as processed - processedBlocks.Add(ifElseStructure.ConditionBlock.Address); - - // Generate the condition expression - string condition = GenerateConditionExpression(function, ifElseStructure.ConditionBlock); - - // Add the if statement - string indent = new string(' ', indentLevel * 4); - result.AppendLine($"{indent}if ({condition})"); - result.AppendLine($"{indent}{{"); - - // Check if the 'then' branch contains a nested if-else structure - if (ifElseStructure.NestedThenStructure != null) - { - // Generate code for the nested if-else structure in the 'then' branch - GenerateIfElseCode(function, ifElseStructure.NestedThenStructure, result, indentLevel + 1, processedBlocks); - } - else - { - // Generate code for the 'then' branch normally - GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks); - } - - // Close the 'then' branch - result.AppendLine($"{indent}}}"); - - // Add the 'else' branch if it exists and is not already processed - if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address)) - { - result.AppendLine($"{indent}else"); - result.AppendLine($"{indent}{{"); - - // Check if the 'else' branch contains a nested if-else structure (else-if) - if (ifElseStructure.NestedElseStructure != null) - { - // Generate code for the nested if-else structure in the 'else' branch - GenerateIfElseCode(function, ifElseStructure.NestedElseStructure, result, indentLevel + 1, processedBlocks); - } - else - { - // Generate code for the 'else' branch normally - GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks); - } - - // Close the 'else' branch - result.AppendLine($"{indent}}}"); - } - - // If this is a complete if-else structure with a merge point, and the merge point hasn't been processed yet - if (ifElseStructure.IsComplete && ifElseStructure.MergeBlock != null && - !processedBlocks.Contains(ifElseStructure.MergeBlock.Address)) - { - // Generate code for the merge block - GenerateBlockCode(function, ifElseStructure.MergeBlock, result, indentLevel, processedBlocks); - } - } - - /// - /// Generates code for a switch structure - /// - /// The function containing the structure - /// The switch structure to generate code for - /// The string builder to append to - /// The current indentation level - /// Set of blocks that have already been processed - private void GenerateSwitchCode(Function function, ControlFlowAnalyzer.SwitchStructure switchStructure, StringBuilder result, int indentLevel, HashSet processedBlocks) - { - // Mark the header block as processed - processedBlocks.Add(switchStructure.HeaderBlock.Address); - - // Generate the switch expression - string switchExpr = "/* switch expression */"; - - // Add the switch statement - string indent = new string(' ', indentLevel * 4); - result.AppendLine($"{indent}// Switch structure at 0x{switchStructure.HeaderBlock.Address:X8}") - .AppendLine($"{indent}switch ({switchExpr})"); - - // Add the switch body - result.AppendLine($"{indent}{{") - .AppendLine(); - - // Generate code for each case - foreach (var switchCase in switchStructure.Cases) - { - // Add the case label - result.AppendLine($"{indent} case {switchCase.Value}:") - .AppendLine($"{indent} // Case block at 0x{switchCase.CaseBlock.Address:X8}"); - - // Generate code for the case block - GenerateBlockCode(function, switchCase.CaseBlock, result, indentLevel + 2, processedBlocks); - - // Add a break statement - result.AppendLine($"{indent} break;") - .AppendLine(); - } - - // Add a default case - result.AppendLine($"{indent} default:") - .AppendLine($"{indent} // Default case") - .AppendLine($"{indent} break;"); - - // Close the switch body - result.AppendLine($"{indent}}}"); - } - - /// - /// Generates code for a loop structure - /// - /// The function containing the structure - /// The loop to generate code for - /// The string builder to append to - /// The current indentation level - /// Set of blocks that have already been processed - private void GenerateLoopCode(Function function, AnalyzerContext.Loop loop, StringBuilder result, int indentLevel, HashSet processedBlocks) - { - // Mark the header block as processed - processedBlocks.Add(loop.Header.Address); - - // Add the loop header - string indent = new string(' ', indentLevel * 4); - result.AppendLine($"{indent}// Loop at 0x{loop.Header.Address:X8}") - .AppendLine($"{indent}while (true) // Simplified loop condition"); - - // Add the loop body - result.AppendLine($"{indent}{{") - .AppendLine($"{indent} // Loop body"); - - // Generate code for the loop body (starting with the header) - GenerateBlockCode(function, loop.Header, result, indentLevel + 1, processedBlocks); - - // Close the loop body - result.AppendLine($"{indent}}}"); - } - - /// - /// Generates a condition expression for an if statement - /// - /// The function containing the block - /// The block containing the condition - /// A string representing the condition expression - private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock) - { - // If the block is empty, return a placeholder - if (conditionBlock.Instructions.Count == 0) - { - return "condition"; - } - - // Get the last instruction (should be a conditional jump) - var lastInstruction = conditionBlock.Instructions[^1]; - - // If it's not a conditional jump, return a placeholder - if (!IsConditionalJump(lastInstruction.Type)) - { - return "condition"; - } - - // Look for a CMP or TEST instruction that sets the flags for this jump - Instruction? comparisonInstruction = null; - - // Search backwards from the jump instruction to find a comparison - for (int i = conditionBlock.Instructions.Count - 2; i >= 0; i--) - { - var instruction = conditionBlock.Instructions[i]; - if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test) - { - comparisonInstruction = instruction; - break; - } - } - - // If we found a comparison instruction, generate a condition based on it and the jump - if (comparisonInstruction != null && comparisonInstruction.StructuredOperands.Count >= 2) - { - var left = FormatOperand(comparisonInstruction.StructuredOperands[0]); - var right = FormatOperand(comparisonInstruction.StructuredOperands[1]); - - // Generate condition based on jump type - return GenerateConditionFromJump(lastInstruction, left, right); - } - - // If we couldn't find a comparison instruction, just use the jump condition - return GenerateConditionFromJump(lastInstruction, null, null); - } - - /// - /// Generates pseudocode for a single instruction - /// - /// The function containing the instruction - /// The instruction to generate pseudocode for - /// The generated pseudocode - private string GenerateInstructionPseudocode(Function function, Instruction instruction) - { - // Check for special cases first - if (instruction.Type == InstructionType.Xor && instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // Check for XOR with self (zeroing a register) - if (dest is RegisterOperand regDest && src is RegisterOperand regSrc && - regDest.Register == regSrc.Register) - { - // This is a common idiom to zero a register - return $"{FormatOperand(dest)} = 0; // XOR with self to zero register"; - } - } - - // Handle different instruction types - switch (instruction.Type) - { - case InstructionType.Mov: - // Handle MOV instruction - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // Special case for moving 0 (common initialization pattern) - if (src is ImmediateOperand immSrc && immSrc.Value == 0) - { - return $"{FormatOperand(dest)} = 0; // Initialize to zero"; - } - - return $"{FormatOperand(dest)} = {FormatOperand(src)};"; - } - break; - - case InstructionType.Add: - // Handle ADD instruction - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // Special case for adding 1 (increment) - if (src is ImmediateOperand immSrc && immSrc.Value == 1) - { - return $"{FormatOperand(dest)}++; // Increment"; - } - - return $"{FormatOperand(dest)} += {FormatOperand(src)};"; - } - break; - - case InstructionType.Sub: - // Handle SUB instruction - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // Special case for subtracting 1 (decrement) - if (src is ImmediateOperand immSrc && immSrc.Value == 1) - { - return $"{FormatOperand(dest)}--; // Decrement"; - } - - return $"{FormatOperand(dest)} -= {FormatOperand(src)};"; - } - break; - - case InstructionType.And: - // Handle AND instruction - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - return $"{FormatOperand(dest)} &= {FormatOperand(src)};"; - } - break; - - case InstructionType.Or: - // Handle OR instruction - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - return $"{FormatOperand(dest)} |= {FormatOperand(src)};"; - } - break; - - case InstructionType.Xor: - // Handle XOR instruction - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var src = instruction.StructuredOperands[1]; - - // We already handled the special case of XOR with self above - return $"{FormatOperand(dest)} ^= {FormatOperand(src)};"; - } - break; - - case InstructionType.Test: - // Handle TEST instruction (no assignment, just sets flags) - if (instruction.StructuredOperands.Count >= 2) - { - var left = instruction.StructuredOperands[0]; - var right = instruction.StructuredOperands[1]; - - // Special case for TEST with self (checking if a register is zero) - if (left is RegisterOperand regLeft && right is RegisterOperand regRight && - regLeft.Register == regRight.Register) - { - return $"// Check if {FormatOperand(left)} is zero"; - } - - return $"// Test {FormatOperand(left)} & {FormatOperand(right)}"; - } - break; - - case InstructionType.Cmp: - // Handle CMP instruction (no assignment, just sets flags) - if (instruction.StructuredOperands.Count >= 2) - { - var left = instruction.StructuredOperands[0]; - var right = instruction.StructuredOperands[1]; - - // For CMP, we'll return a comment that explains what's being compared - // This will help with understanding the following conditional jumps - return $"// Compare {FormatOperand(left)} with {FormatOperand(right)}"; - } - break; - - case InstructionType.Call: - // Handle CALL instruction - if (instruction.StructuredOperands.Count >= 1) - { - var target = instruction.StructuredOperands[0]; - - // For function calls, we'll generate a proper function call expression - return $"{FormatOperand(target)}(); // Function call"; - } - break; - - case InstructionType.Ret: - // Handle RET instruction - return "return 0; // Placeholder return value"; - - case InstructionType.Push: - // Handle PUSH instruction - if (instruction.StructuredOperands.Count >= 1) - { - var src = instruction.StructuredOperands[0]; - return $"// Push {FormatOperand(src)} onto stack"; - } - break; - - case InstructionType.Pop: - // Handle POP instruction - if (instruction.StructuredOperands.Count >= 1) - { - var dest = instruction.StructuredOperands[0]; - return $"{FormatOperand(dest)} = pop(); // Pop from stack"; - } - break; - - case InstructionType.Inc: - // Handle INC instruction - if (instruction.StructuredOperands.Count >= 1) - { - var dest = instruction.StructuredOperands[0]; - return $"{FormatOperand(dest)}++;"; - } - break; - - case InstructionType.Dec: - // Handle DEC instruction - if (instruction.StructuredOperands.Count >= 1) - { - var dest = instruction.StructuredOperands[0]; - return $"{FormatOperand(dest)}--;"; - } - break; - - case InstructionType.Shl: - // Handle SHL/SAL instruction (shift left) - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var count = instruction.StructuredOperands[1]; - return $"{FormatOperand(dest)} <<= {FormatOperand(count)};"; - } - break; - - case InstructionType.Shr: - // Handle SHR instruction (shift right logical) - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var count = instruction.StructuredOperands[1]; - return $"{FormatOperand(dest)} >>>= {FormatOperand(count)}; // Logical shift right"; - } - break; - - case InstructionType.Sar: - // Handle SAR instruction (shift right arithmetic) - if (instruction.StructuredOperands.Count >= 2) - { - var dest = instruction.StructuredOperands[0]; - var count = instruction.StructuredOperands[1]; - return $"{FormatOperand(dest)} >>= {FormatOperand(count)}; // Arithmetic shift right"; - } - break; - - default: - // For other instructions, just add a comment - return $"// {instruction}"; - } - - return string.Empty; - } - - /// - /// Formats an operand for display in pseudocode - /// - /// The operand to format - /// A string representation of the operand - private string FormatOperand(Operand operand) - { - if (operand is RegisterOperand regOp) - { - // Format register operand - return RegisterMapper.GetRegisterName(regOp.Register, 32); - } - else if (operand is ImmediateOperand immOp) - { - // Format immediate operand - return $"0x{immOp.Value:X}"; - } - else if (operand is DisplacementMemoryOperand dispOp) - { - // Format displacement memory operand - string baseReg = RegisterMapper.GetRegisterName(dispOp.BaseRegister, 32); - return $"*({baseReg} + 0x{dispOp.Displacement:X})"; - } - else if (operand is BaseRegisterMemoryOperand baseOp) - { - // Format base register memory operand - string baseReg = RegisterMapper.GetRegisterName(baseOp.BaseRegister, 32); - return $"*({baseReg})"; - } - - // Default formatting - return operand.ToString(); - } - - /// - /// Checks if an instruction is part of the function prologue or epilogue - /// - /// The instruction to check - /// True if the instruction is part of the prologue or epilogue, false otherwise - private bool IsPrologueOrEpilogueInstruction(Instruction instruction) - { - // Check for common prologue/epilogue instructions - if (instruction.Type == InstructionType.Push && - instruction.StructuredOperands.Count > 0 && - instruction.StructuredOperands[0] is RegisterOperand reg && - reg.Register == RegisterIndex.Bp) - { - return true; // push ebp - } - - if (instruction.Type == InstructionType.Mov && - instruction.StructuredOperands.Count > 1 && - instruction.StructuredOperands[0] is RegisterOperand destReg && - instruction.StructuredOperands[1] is RegisterOperand srcReg && - destReg.Register == RegisterIndex.Bp && - srcReg.Register == RegisterIndex.Sp) - { - return true; // mov ebp, esp - } - - if (instruction.Type == InstructionType.Pop && - instruction.StructuredOperands.Count > 0 && - instruction.StructuredOperands[0] is RegisterOperand popReg && - popReg.Register == RegisterIndex.Bp) - { - return true; // pop ebp - } - - if (instruction.Type == InstructionType.Ret) - { - return true; // ret - } - - return false; - } - - /// - /// Checks if the given instruction type is a conditional jump - /// - /// The instruction type - /// True if the instruction is a conditional jump, false otherwise - private bool IsConditionalJump(InstructionType type) - { - // Check for common conditional jumps - return type == InstructionType.Jz || - type == InstructionType.Jnz || - type == InstructionType.Jg || - type == InstructionType.Jge || - type == InstructionType.Jl || - type == InstructionType.Jle || - type == InstructionType.Ja || - type == InstructionType.Jae || - type == InstructionType.Jb || - type == InstructionType.Jbe || - type == InstructionType.Jo || - type == InstructionType.Jno || - type == InstructionType.Js || - type == InstructionType.Jns; - } - - /// - /// Gets the target address of a jump instruction - /// - /// The jump instruction - /// The target address of the jump - private ulong GetJumpTargetAddress(Instruction instruction) - { - // Jump instructions have the target address as their first operand - if (instruction.StructuredOperands.Count > 0) - { - return instruction.StructuredOperands[0].GetValue(); - } - - // If we can't determine the target address, return 0 - return 0; - } - - /// - /// Generates a condition expression based on a conditional jump instruction - /// - /// The conditional jump instruction - /// The left operand of the comparison, if available - /// The right operand of the comparison, if available - /// A string representing the condition expression - private string GenerateConditionFromJump(Instruction instruction, string? left = null, string? right = null) - { - // If we don't have comparison operands, use a generic condition - if (left == null || right == null) - { - switch (instruction.Type) - { - case InstructionType.Jz: return "zero flag is set"; - case InstructionType.Jnz: return "zero flag is not set"; - default: return "condition"; - } - } - - // If we have comparison operands, generate a more specific condition - switch (instruction.Type) - { - case InstructionType.Jz: return $"{left} == 0"; - case InstructionType.Jnz: return $"{left} != 0"; - default: return $"{left} ? {right}"; - } - } -} diff --git a/X86Disassembler/Analysers/VariableAnalyzer.cs b/X86Disassembler/Analysers/VariableAnalyzer.cs deleted file mode 100644 index 8f9749d..0000000 --- a/X86Disassembler/Analysers/VariableAnalyzer.cs +++ /dev/null @@ -1,252 +0,0 @@ -using X86Disassembler.Analysers.DecompilerTypes; -using X86Disassembler.X86; -using X86Disassembler.X86.Operands; - -namespace X86Disassembler.Analysers; - -/// -/// Analyzes disassembled code to identify and track variables -/// -public class VariableAnalyzer -{ - /// - /// The analyzer context - /// - private readonly AnalyzerContext _context; - - /// - /// Creates a new variable analyzer - /// - /// The analyzer context - public VariableAnalyzer(AnalyzerContext context) - { - _context = context; - } - - /// - /// Analyzes the function to identify stack variables - /// - /// The function to analyze - public void AnalyzeStackVariables(Function function) - { - // Dictionary to track stack offsets and their corresponding variables - var stackOffsets = new Dictionary(); - - // First, identify the function prologue to determine stack frame setup - bool hasPushEbp = false; - bool hasMovEbpEsp = false; - int localSize = 0; - - // Look for the function prologue pattern: push ebp; mov ebp, esp; sub esp, X - foreach (var block in function.AsmFunction.Blocks) - { - foreach (var instruction in block.Instructions) - { - // Look for push ebp - if (instruction.Type == InstructionType.Push && - instruction.StructuredOperands.Count > 0 && - instruction.StructuredOperands[0] is RegisterOperand regOp && - regOp.Register == RegisterIndex.Bp) - { - hasPushEbp = true; - continue; - } - - // Look for mov ebp, esp - if (instruction.Type == InstructionType.Mov && - instruction.StructuredOperands.Count > 1 && - instruction.StructuredOperands[0] is RegisterOperand destReg && - instruction.StructuredOperands[1] is RegisterOperand srcReg && - destReg.Register == RegisterIndex.Bp && - srcReg.Register == RegisterIndex.Sp) - { - hasMovEbpEsp = true; - continue; - } - - // Look for sub esp, X to determine local variable space - if (instruction.Type == InstructionType.Sub && - instruction.StructuredOperands.Count > 1 && - instruction.StructuredOperands[0] is RegisterOperand subReg && - instruction.StructuredOperands[1] is ImmediateOperand immOp && - subReg.Register == RegisterIndex.Sp) - { - localSize = (int)immOp.Value; - break; - } - } - - // If we found the complete prologue, no need to check more blocks - if (hasPushEbp && hasMovEbpEsp && localSize > 0) - { - break; - } - } - - // If we didn't find a standard prologue, we can't reliably analyze stack variables - if (!hasPushEbp || !hasMovEbpEsp) - { - return; - } - - // Now scan for memory accesses relative to EBP - foreach (var block in function.AsmFunction.Blocks) - { - foreach (var instruction in block.Instructions) - { - // Look for memory operands that reference [ebp+X] or [ebp-X] - foreach (var operand in instruction.StructuredOperands) - { - if (operand is DisplacementMemoryOperand memOp && - memOp.BaseRegister == RegisterIndex.Bp) - { - // This is accessing memory relative to EBP - int offset = (int)memOp.Displacement; - - // Determine if this is a parameter or local variable - if (offset > 0 && offset < 1000) // Positive offset = parameter (with reasonable limit) - { - // Parameters start at [ebp+8] (return address at [ebp+4], saved ebp at [ebp+0]) - int paramIndex = (offset - 8) / 4; // Assuming 4-byte parameters - - // Make sure we have enough parameters in the function - while (function.Parameters.Count <= paramIndex) - { - var param = new Variable($"param_{function.Parameters.Count + 1}", DataType.Unknown) - { - Storage = Variable.StorageType.Parameter, - StackOffset = 8 + (function.Parameters.Count * 4), - IsParameter = true, - ParameterIndex = function.Parameters.Count, - Size = 4 // Assume 4 bytes (32-bit) - }; - function.Parameters.Add(param); - } - } - else if (offset < 0 && offset > -1000) // Negative offset = local variable (with reasonable limit) - { - // Check if we've already seen this offset - if (!stackOffsets.TryGetValue(offset, out var variable)) - { - // Create a new local variable - variable = new Variable($"local_{Math.Abs(offset)}", DataType.Unknown) - { - Storage = Variable.StorageType.Stack, - StackOffset = offset, - Size = 4 // Assume 4 bytes (32-bit) - }; - - // Add to our tracking dictionaries - stackOffsets[offset] = variable; - function.LocalVariables.Add(variable); - } - - // Track the usage of this variable - TrackVariableUsage(variable, instruction); - } - } - } - } - } - - // Analyze register-based variables - AnalyzeRegisterVariables(function); - } - - /// - /// Analyzes register usage to identify variables stored in registers - /// - /// The function to analyze - private void AnalyzeRegisterVariables(Function function) - { - // This is a more complex analysis that would track register values across blocks - // For now, we'll focus on identifying registers that hold consistent values - - // Dictionary to track register variables - var registerVariables = new Dictionary(); - - // For each block, analyze register usage - foreach (var block in function.AsmFunction.Blocks) - { - // Check if we have register values for this block from data flow analysis - var registerValuesKey = "RegisterValues"; - if (_context.GetAnalysisData>(block.Address, registerValuesKey) is Dictionary registerValues) - { - foreach (var kvp in registerValues) - { - var register = kvp.Key; - var valueInfo = kvp.Value; - - // Skip special registers like ESP and EBP - if (register == RegisterIndex.Sp || register == RegisterIndex.Bp) - { - continue; - } - - // If the register holds a constant value, it might be a variable - if (valueInfo.Type == DataFlowAnalyzer.ValueInfo.ValueType.Constant) - { - // Check if we already have a variable for this register - if (!registerVariables.TryGetValue(register, out var variable)) - { - // Create a new register variable - variable = new Variable($"reg_{RegisterMapper.GetRegisterName(register, 32)}", DataType.Unknown) - { - Storage = Variable.StorageType.Register, - Register = register, - Size = 4 // Assume 4 bytes (32-bit) - }; - - // Add to our tracking dictionary - registerVariables[register] = variable; - function.RegisterVariables.Add(variable); - } - } - } - } - } - } - - /// - /// Tracks how a variable is used in an instruction - /// - /// The variable to track - /// The instruction using the variable - private void TrackVariableUsage(Variable variable, Instruction instruction) - { - // For now, we'll just try to infer the variable type based on its usage - - // If the variable is used in a comparison with 0, it might be a boolean - if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test) - { - if (instruction.StructuredOperands.Count > 1 && - instruction.StructuredOperands[1] is ImmediateOperand immOp && - immOp.Value == 0) - { - // This might be a boolean check - if (variable.Type == DataType.Unknown) - { - // Set to int for now as we don't have a bool type - variable.Type = DataType.Int; - } - } - } - - // If the variable is used with string instructions, it might be a string - // Check for string operations - we don't have specific string instruction types yet - // Skip string detection for now as we don't have the specific instruction types - // We'll detect strings through other means later - - // If the variable is used with floating-point instructions, it might be a float - // Check for floating-point operations - if (instruction.Type == InstructionType.Fld || - instruction.Type == InstructionType.Fst || - instruction.Type == InstructionType.Fstp) - { - if (variable.Type == DataType.Unknown) - { - variable.Type = DataType.Float; - } - } - } -} diff --git a/X86Disassembler/Program.cs b/X86Disassembler/Program.cs index 2fa363d..e321f0d 100644 --- a/X86Disassembler/Program.cs +++ b/X86Disassembler/Program.cs @@ -1,5 +1,6 @@ using X86Disassembler.Analysers; using X86Disassembler.PE; +using X86Disassembler.ProjectSystem; using X86Disassembler.X86; namespace X86Disassembler; @@ -11,7 +12,7 @@ public class Program { // Hardcoded file path for testing private const string FilePath = @"C:\Program Files (x86)\Nikita\Iron Strategy\Terrain.dll"; - + /// /// Main entry point /// @@ -20,18 +21,18 @@ public class Program { Console.WriteLine("X86 Disassembler and Decompiler"); Console.WriteLine("--------------------------------"); - + // Load the file Console.WriteLine($"Loading file: {FilePath}"); byte[] fileBytes = File.ReadAllBytes(FilePath); Console.WriteLine($"Successfully loaded {FilePath}"); Console.WriteLine($"File size: {fileBytes.Length} bytes\n"); - + // Parse the PE format Console.WriteLine("Parsing PE format...\n"); PeFile peFile = new PeFile(fileBytes); peFile.Parse(); - + // Print PE file information Console.WriteLine("PE File Information:"); Console.WriteLine($"Architecture: {(peFile.OptionalHeader.Is64Bit() ? "64-bit" : "32-bit")}"); @@ -48,7 +49,17 @@ public class Program // Print import information PrintPeImports(peFile); - + + var projectPeFile = new ProjectPeFile() + { + ImageBase = new VirtualAddress(0, peFile.OptionalHeader.ImageBase), + Architecture = peFile.OptionalHeader.Is64Bit() + ? "64-bit" + : "32-bit", + Name = Path.GetFileName(FilePath), + EntryPointAddress = new FileAbsoluteAddress(peFile.OptionalHeader.AddressOfEntryPoint, peFile.OptionalHeader.ImageBase) + }; + // Find code sections var codeSections = peFile.SectionHeaders.FindAll(s => s.ContainsCode()); Console.WriteLine($"Found {codeSections.Count} code section(s):"); @@ -56,74 +67,34 @@ public class Program { Console.WriteLine($" - {section.Name}: Size={section.VirtualSize} bytes, RVA=0x{section.VirtualAddress:X8}"); } + Console.WriteLine(); - + + var projectPeFileSections = peFile.SectionHeaders.Select( + x => new ProjectPeFileSection() + { + Name = x.Name, + Flags = (x.ContainsCode() ? SectionFlags.Code : SectionFlags.None) | + (x.IsReadable() ? SectionFlags.Read : SectionFlags.None) | + (x.IsWritable() ? SectionFlags.Write : SectionFlags.None) | + (x.IsExecutable() ? SectionFlags.Exec : SectionFlags.None) , + VirtualAddress = new VirtualAddress(x.VirtualAddress, peFile.OptionalHeader.ImageBase), + Size = x.VirtualSize + } + ).ToList(); + // Disassemble the first code section if (codeSections.Count > 0) { var section = codeSections[0]; byte[] codeBytes = peFile.GetSectionData(peFile.SectionHeaders.IndexOf(section)); - - // // First demonstrate sequential disassembly - // Console.WriteLine($"Sequential disassembly of section {section.Name} at RVA 0x{section.VirtualAddress:X8}:"); - // - // // Create a disassembler for the code section - // // Base address should be the section's virtual address, not the image base + VA - // Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress); - // - // // Disassemble sequentially (linear approach) - // var linearInstructions = disassembler.Disassemble(); - // - // // Print the first 30 instructions from linear disassembly - // int linearCount = Math.Min(30, linearInstructions.Count); - // for (int i = 0; i < linearCount; i++) - // { - // Console.WriteLine(linearInstructions[i]); - // } - // - // disassemble entry point - var disassembler = new BlockDisassembler(codeBytes, section.VirtualAddress); - - var asmFunction = disassembler.DisassembleFromAddress(peFile.OptionalHeader.AddressOfEntryPoint); - - // Run all analyzers on the function - asmFunction.Analyze(); - - // Create a decompiler engine - var decompiler = new DecompilerEngine(peFile); - - try - { - // Find a suitable exported function to decompile - // Let's try to find a function that might have more complex control flow - var exportedFunctions = peFile.ExportedFunctions; - - // Print all exported functions to help us choose a better one - Console.WriteLine("Available exported functions:"); - foreach (var func in exportedFunctions) - { - Console.WriteLine($" - {func.Name} (RVA=0x{func.AddressRva:X8})"); - } - - // Decompile the entry point function - Console.WriteLine($"\nDecompiling entry point function at address 0x{peFile.OptionalHeader.AddressOfEntryPoint:X8}\n"); - - // Decompile the entry point function - var function = decompiler.DecompileFunction(peFile.OptionalHeader.AddressOfEntryPoint); - // Generate pseudocode - var pseudocode = decompiler.GeneratePseudocode(function); - Console.WriteLine("\nGenerated Pseudocode:\n"); - Console.WriteLine(pseudocode); - } - catch (Exception ex) - { - Console.WriteLine($"Error decompiling function: {ex.Message}"); - } - - // Skip displaying detailed loop information to keep output concise + var disassembler = new BlockDisassembler(codeBytes, section.VirtualAddress); + + var asmFunction = disassembler.DisassembleFromAddress(peFile.OptionalHeader.AddressOfEntryPoint); + Console.WriteLine(asmFunction); } - + // Console.WriteLine("\nPress Enter to exit..."); // Console.ReadLine(); } @@ -136,7 +107,7 @@ public class Program foreach (var import in peFile.ImportDescriptors) { Console.WriteLine($" DLL: {import.DllName}"); - + for (int i = 0; i < import.Functions.Count; i++) { var function = import.Functions[i]; @@ -150,6 +121,7 @@ public class Program } } } + Console.WriteLine(); } @@ -159,12 +131,13 @@ public class Program Console.WriteLine($"DLL Name: {peFile.ExportDirectory.DllName}"); Console.WriteLine($"Number of Functions: {peFile.ExportDirectory.NumberOfFunctions}"); Console.WriteLine($"Number of Names: {peFile.ExportDirectory.NumberOfNames}"); - + for (int i = 0; i < peFile.ExportedFunctions.Count; i++) { var export = peFile.ExportedFunctions[i]; Console.WriteLine($" {i}: {export.Name} (Ordinal={export.Ordinal}, RVA=0x{export.AddressRva:X8})"); } + Console.WriteLine(); } @@ -178,9 +151,10 @@ public class Program if (section.IsExecutable()) flags += "Exec "; if (section.IsReadable()) flags += "Read "; if (section.IsWritable()) flags += "Write"; - + Console.WriteLine($" {peFile.SectionHeaders.IndexOf(section)}: {section.Name,-8} VA=0x{section.VirtualAddress:X8} Size={section.VirtualSize,-8} [{flags}]"); } + Console.WriteLine(); } } \ No newline at end of file diff --git a/X86Disassembler/ProjectSystem/ProjectPeFile.cs b/X86Disassembler/ProjectSystem/ProjectPeFile.cs new file mode 100644 index 0000000..4551a5a --- /dev/null +++ b/X86Disassembler/ProjectSystem/ProjectPeFile.cs @@ -0,0 +1,35 @@ +using X86Disassembler.Analysers; + +namespace X86Disassembler.ProjectSystem; + +public class ProjectPeFile +{ + public string Name { get; set; } + + public string Architecture { get; set; } + + public Address EntryPointAddress { get; set; } + + public Address ImageBase { get; set; } +} + +public class ProjectPeFileSection +{ + public string Name { get; set; } + + public Address VirtualAddress { get; set; } + + public ulong Size { get; set; } + + public SectionFlags Flags { get; set; } +} + +[Flags] +public enum SectionFlags +{ + None = 0, + Code = 1, + Exec = 2, + Read = 4, + Write = 8 +} \ No newline at end of file