namespace X86Disassembler.Decompiler; using System.Collections.Generic; using X86; /// /// Performs data flow analysis on x86 instructions /// public class DataFlowAnalysis { /// /// Represents a variable in the decompiled code /// public class Variable { /// /// Gets or sets the name of the variable /// public string Name { get; set; } = string.Empty; /// /// Gets or sets the type of the variable (if known) /// public string Type { get; set; } = "int"; // Default to int /// /// Gets or sets the storage location (register, memory, etc.) /// public string Location { get; set; } = string.Empty; /// /// Gets or sets whether this variable is a parameter /// public bool IsParameter { get; set; } /// /// Gets or sets whether this variable is a return value /// public bool IsReturnValue { get; set; } } /// /// Represents an operation in the decompiled code /// public class Operation { /// /// Gets or sets the operation type /// public string Type { get; set; } = string.Empty; /// /// Gets or sets the destination variable /// public Variable? Destination { get; set; } /// /// Gets or sets the source variables or constants /// public List Sources { get; } = []; // Can be Variable or constant value /// /// Gets or sets the original instruction /// public Instruction OriginalInstruction { get; set; } = null!; public ulong InstructionAddress { get; set; } } // Map of register names to variables private readonly Dictionary _registerVariables = []; // Map of memory locations to variables private readonly Dictionary _memoryVariables = []; // List of operations private readonly List _operations = []; // Counter for generating variable names private int _variableCounter = 0; /// /// Gets the list of operations /// public IReadOnlyList Operations => _operations; /// /// Gets the list of variables /// public IEnumerable Variables { get { HashSet uniqueVariables = []; foreach (var variable in _registerVariables.Values) { uniqueVariables.Add(variable); } foreach (var variable in _memoryVariables.Values) { uniqueVariables.Add(variable); } return uniqueVariables; } } /// /// Analyzes a list of instructions to identify variables and operations /// /// The list of instructions to analyze public void Analyze(List instructions) { // Initialize common register variables InitializeRegisterVariables(); // Process each instruction foreach (var instruction in instructions) { AnalyzeInstruction(instruction); } } /// /// Initializes common register variables /// private void InitializeRegisterVariables() { // 32-bit general purpose registers _registerVariables["eax"] = new Variable { Name = "eax", Location = "eax" }; _registerVariables["ebx"] = new Variable { Name = "ebx", Location = "ebx" }; _registerVariables["ecx"] = new Variable { Name = "ecx", Location = "ecx" }; _registerVariables["edx"] = new Variable { Name = "edx", Location = "edx" }; _registerVariables["esi"] = new Variable { Name = "esi", Location = "esi" }; _registerVariables["edi"] = new Variable { Name = "edi", Location = "edi" }; _registerVariables["ebp"] = new Variable { Name = "ebp", Location = "ebp" }; _registerVariables["esp"] = new Variable { Name = "esp", Location = "esp" }; // Mark EAX as the return value register _registerVariables["eax"].IsReturnValue = true; // 16-bit registers _registerVariables["ax"] = new Variable { Name = "ax", Location = "ax" }; _registerVariables["bx"] = new Variable { Name = "bx", Location = "bx" }; _registerVariables["cx"] = new Variable { Name = "cx", Location = "cx" }; _registerVariables["dx"] = new Variable { Name = "dx", Location = "dx" }; _registerVariables["si"] = new Variable { Name = "si", Location = "si" }; _registerVariables["di"] = new Variable { Name = "di", Location = "di" }; _registerVariables["bp"] = new Variable { Name = "bp", Location = "bp" }; _registerVariables["sp"] = new Variable { Name = "sp", Location = "sp" }; // 8-bit registers _registerVariables["al"] = new Variable { Name = "al", Location = "al" }; _registerVariables["ah"] = new Variable { Name = "ah", Location = "ah" }; _registerVariables["bl"] = new Variable { Name = "bl", Location = "bl" }; _registerVariables["bh"] = new Variable { Name = "bh", Location = "bh" }; _registerVariables["cl"] = new Variable { Name = "cl", Location = "cl" }; _registerVariables["ch"] = new Variable { Name = "ch", Location = "ch" }; _registerVariables["dl"] = new Variable { Name = "dl", Location = "dl" }; _registerVariables["dh"] = new Variable { Name = "dh", Location = "dh" }; } /// /// Analyzes a single instruction to identify variables and operations /// /// The instruction to analyze private void AnalyzeInstruction(Instruction instruction) { // Use instruction.Type instead of instruction.Mnemonic InstructionType type = instruction.Type; // Use instruction.StructuredOperands instead of instruction.Operands var structuredOperands = instruction.StructuredOperands; // Skip instructions without operands if (structuredOperands == null || structuredOperands.Count == 0) { return; } // Create a new operation based on the instruction type Operation operation = new Operation { InstructionAddress = instruction.Address, Type = GetOperationType(type) }; // Process the operation based on the instruction type // This would need to be updated to work with structured operands // For now, we'll just add a placeholder _operations.Add(operation); } private string GetOperationType(InstructionType type) { switch (type) { case InstructionType.Add: return "add"; case InstructionType.Sub: return "sub"; case InstructionType.Mul: return "mul"; case InstructionType.Div: return "div"; case InstructionType.And: return "and"; case InstructionType.Or: return "or"; case InstructionType.Xor: return "xor"; case InstructionType.Push: return "push"; case InstructionType.Pop: return "pop"; case InstructionType.Call: return "call"; case InstructionType.Ret: return "return"; case InstructionType.Cmp: return "cmp"; case InstructionType.Test: return "test"; case InstructionType.Jmp: return "jmp"; case InstructionType.Je: return "je"; case InstructionType.Jne: return "jne"; case InstructionType.Jg: return "jg"; case InstructionType.Jge: return "jge"; case InstructionType.Jl: return "jl"; case InstructionType.Jle: return "jle"; default: return type.ToString(); } } /// /// Handles a MOV instruction /// /// The operation to populate /// The operand parts private void HandleMovInstruction(Operation operation, string[] operandParts) { if (operandParts.Length != 2) { return; } operation.Type = "assignment"; // Get or create the destination variable Variable destination = GetOrCreateVariable(operandParts[0]); operation.Destination = destination; // Get the source (variable or constant) object source = GetOperandValue(operandParts[1]); operation.Sources.Add(source); } /// /// Handles an arithmetic instruction (ADD, SUB, MUL, DIV, AND, OR, XOR) /// /// The operation to populate /// The instruction mnemonic /// The operand parts private void HandleArithmeticInstruction(Operation operation, string mnemonic, string[] operandParts) { if (operandParts.Length != 2) { return; } operation.Type = mnemonic; // Get or create the destination variable Variable destination = GetOrCreateVariable(operandParts[0]); operation.Destination = destination; // Get the source (variable or constant) object source = GetOperandValue(operandParts[1]); operation.Sources.Add(source); operation.Sources.Add(destination); // The destination is also a source in arithmetic operations } /// /// Handles a stack instruction (PUSH, POP) /// /// The operation to populate /// The instruction mnemonic /// The operand parts private void HandleStackInstruction(Operation operation, string mnemonic, string[] operandParts) { if (operandParts.Length != 1) { return; } operation.Type = mnemonic; if (mnemonic == "push") { // For PUSH, the operand is the source object source = GetOperandValue(operandParts[0]); operation.Sources.Add(source); } else if (mnemonic == "pop") { // For POP, the operand is the destination Variable destination = GetOrCreateVariable(operandParts[0]); operation.Destination = destination; } } /// /// Handles a CALL instruction /// /// The operation to populate /// The operand parts private void HandleCallInstruction(Operation operation, string[] operandParts) { if (operandParts.Length != 1) { return; } operation.Type = "call"; // The operand is the function name or address operation.Sources.Add(operandParts[0]); } /// /// Handles a RET instruction /// /// The operation to populate private void HandleReturnInstruction(Operation operation) { operation.Type = "return"; // The return value is in EAX if (_registerVariables.TryGetValue("eax", out Variable? eax)) { operation.Sources.Add(eax); } } /// /// Handles a comparison instruction (CMP, TEST) /// /// The operation to populate /// The instruction mnemonic /// The operand parts private void HandleComparisonInstruction(Operation operation, string mnemonic, string[] operandParts) { if (operandParts.Length != 2) { return; } operation.Type = mnemonic; // Get the operands object left = GetOperandValue(operandParts[0]); object right = GetOperandValue(operandParts[1]); operation.Sources.Add(left); operation.Sources.Add(right); } /// /// Handles a jump instruction (JMP, JE, JNE, etc.) /// /// The operation to populate /// The instruction mnemonic /// The operand parts private void HandleJumpInstruction(Operation operation, string mnemonic, string[] operandParts) { if (operandParts.Length != 1) { return; } operation.Type = mnemonic; // The operand is the jump target operation.Sources.Add(operandParts[0]); } /// /// Gets or creates a variable for an operand /// /// The operand string /// The variable private Variable GetOrCreateVariable(string operand) { // Check if it's a register if (IsRegister(operand)) { string register = operand.ToLower(); if (_registerVariables.TryGetValue(register, out Variable? variable)) { return variable; } } // Check if it's a memory location if (IsMemoryLocation(operand)) { string normalizedLocation = NormalizeMemoryLocation(operand); if (_memoryVariables.TryGetValue(normalizedLocation, out Variable? variable)) { return variable; } // Create a new variable for this memory location variable = new Variable { Name = $"var_{_variableCounter++}", Location = normalizedLocation }; _memoryVariables[normalizedLocation] = variable; return variable; } // If it's neither a register nor a memory location, create a temporary variable Variable tempVariable = new Variable { Name = $"temp_{_variableCounter++}", Location = operand }; return tempVariable; } /// /// Gets the value of an operand (variable or constant) /// /// The operand string /// The operand value (Variable or constant) private object GetOperandValue(string operand) { // Check if it's a register or memory location if (IsRegister(operand) || IsMemoryLocation(operand)) { return GetOrCreateVariable(operand); } // Check if it's a hexadecimal constant if (operand.StartsWith("0x") && operand.Length > 2) { if (int.TryParse(operand.Substring(2), System.Globalization.NumberStyles.HexNumber, null, out int value)) { return value; } } // Check if it's a decimal constant if (int.TryParse(operand, out int decimalValue)) { return decimalValue; } // Otherwise, return the operand as a string return operand; } /// /// Checks if an operand is a register /// /// The operand to check /// True if the operand is a register private bool IsRegister(string operand) { string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", "ax", "bx", "cx", "dx", "si", "di", "bp", "sp", "al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" }; return Array.IndexOf(registers, operand.ToLower()) >= 0; } /// /// Checks if an operand is a memory location /// /// The operand to check /// True if the operand is a memory location private bool IsMemoryLocation(string operand) { return operand.Contains('[') && operand.Contains(']'); } /// /// Normalizes a memory location operand /// /// The operand to normalize /// The normalized memory location private string NormalizeMemoryLocation(string operand) { // Extract the part inside the brackets int startIndex = operand.IndexOf('['); int endIndex = operand.IndexOf(']'); if (startIndex >= 0 && endIndex > startIndex) { string memoryReference = operand.Substring(startIndex + 1, endIndex - startIndex - 1).Trim(); return memoryReference; } return operand; } }