namespace X86Disassembler.Decompiler;
using System.Collections.Generic;
using X86Disassembler.X86;
///
/// Performs data flow analysis on x86 instructions
///
public class DataFlowAnalysis
{
///
/// Represents a variable in the decompiled code
///
public class Variable
{
///
/// Gets or sets the name of the variable
///
public string Name { get; set; } = string.Empty;
///
/// Gets or sets the type of the variable (if known)
///
public string Type { get; set; } = "int"; // Default to int
///
/// Gets or sets the storage location (register, memory, etc.)
///
public string Location { get; set; } = string.Empty;
///
/// Gets or sets whether this variable is a parameter
///
public bool IsParameter { get; set; }
///
/// Gets or sets whether this variable is a return value
///
public bool IsReturnValue { get; set; }
}
///
/// Represents an operation in the decompiled code
///
public class Operation
{
///
/// Gets or sets the operation type
///
public string Type { get; set; } = string.Empty;
///
/// Gets or sets the destination variable
///
public Variable? Destination { get; set; }
///
/// Gets or sets the source variables or constants
///
public List Sources { get; } = []; // Can be Variable or constant value
///
/// Gets or sets the original instruction
///
public Instruction OriginalInstruction { get; set; } = null!;
public ulong InstructionAddress { get; set; }
}
// Map of register names to variables
private readonly Dictionary _registerVariables = [];
// Map of memory locations to variables
private readonly Dictionary _memoryVariables = [];
// List of operations
private readonly List _operations = [];
// Counter for generating variable names
private int _variableCounter = 0;
///
/// Gets the list of operations
///
public IReadOnlyList Operations => _operations;
///
/// Gets the list of variables
///
public IEnumerable Variables
{
get
{
HashSet uniqueVariables = [];
foreach (var variable in _registerVariables.Values)
{
uniqueVariables.Add(variable);
}
foreach (var variable in _memoryVariables.Values)
{
uniqueVariables.Add(variable);
}
return uniqueVariables;
}
}
///
/// Analyzes a list of instructions to identify variables and operations
///
/// The list of instructions to analyze
public void Analyze(List instructions)
{
// Initialize common register variables
InitializeRegisterVariables();
// Process each instruction
foreach (var instruction in instructions)
{
AnalyzeInstruction(instruction);
}
}
///
/// Initializes common register variables
///
private void InitializeRegisterVariables()
{
// 32-bit general purpose registers
_registerVariables["eax"] = new Variable { Name = "eax", Location = "eax" };
_registerVariables["ebx"] = new Variable { Name = "ebx", Location = "ebx" };
_registerVariables["ecx"] = new Variable { Name = "ecx", Location = "ecx" };
_registerVariables["edx"] = new Variable { Name = "edx", Location = "edx" };
_registerVariables["esi"] = new Variable { Name = "esi", Location = "esi" };
_registerVariables["edi"] = new Variable { Name = "edi", Location = "edi" };
_registerVariables["ebp"] = new Variable { Name = "ebp", Location = "ebp" };
_registerVariables["esp"] = new Variable { Name = "esp", Location = "esp" };
// Mark EAX as the return value register
_registerVariables["eax"].IsReturnValue = true;
// 16-bit registers
_registerVariables["ax"] = new Variable { Name = "ax", Location = "ax" };
_registerVariables["bx"] = new Variable { Name = "bx", Location = "bx" };
_registerVariables["cx"] = new Variable { Name = "cx", Location = "cx" };
_registerVariables["dx"] = new Variable { Name = "dx", Location = "dx" };
_registerVariables["si"] = new Variable { Name = "si", Location = "si" };
_registerVariables["di"] = new Variable { Name = "di", Location = "di" };
_registerVariables["bp"] = new Variable { Name = "bp", Location = "bp" };
_registerVariables["sp"] = new Variable { Name = "sp", Location = "sp" };
// 8-bit registers
_registerVariables["al"] = new Variable { Name = "al", Location = "al" };
_registerVariables["ah"] = new Variable { Name = "ah", Location = "ah" };
_registerVariables["bl"] = new Variable { Name = "bl", Location = "bl" };
_registerVariables["bh"] = new Variable { Name = "bh", Location = "bh" };
_registerVariables["cl"] = new Variable { Name = "cl", Location = "cl" };
_registerVariables["ch"] = new Variable { Name = "ch", Location = "ch" };
_registerVariables["dl"] = new Variable { Name = "dl", Location = "dl" };
_registerVariables["dh"] = new Variable { Name = "dh", Location = "dh" };
}
///
/// Analyzes a single instruction to identify variables and operations
///
/// The instruction to analyze
private void AnalyzeInstruction(Instruction instruction)
{
// Use instruction.Type instead of instruction.Mnemonic
InstructionType type = instruction.Type;
// Use instruction.StructuredOperands instead of instruction.Operands
var structuredOperands = instruction.StructuredOperands;
// Skip instructions without operands
if (structuredOperands == null || structuredOperands.Count == 0)
{
return;
}
// Create a new operation based on the instruction type
Operation operation = new Operation
{
InstructionAddress = instruction.Address,
Type = GetOperationType(type)
};
// Process the operation based on the instruction type
// This would need to be updated to work with structured operands
// For now, we'll just add a placeholder
_operations.Add(operation);
}
private string GetOperationType(InstructionType type)
{
switch (type)
{
case InstructionType.Add:
return "add";
case InstructionType.Sub:
return "sub";
case InstructionType.Mul:
return "mul";
case InstructionType.Div:
return "div";
case InstructionType.And:
return "and";
case InstructionType.Or:
return "or";
case InstructionType.Xor:
return "xor";
case InstructionType.Push:
return "push";
case InstructionType.Pop:
return "pop";
case InstructionType.Call:
return "call";
case InstructionType.Ret:
return "return";
case InstructionType.Cmp:
return "cmp";
case InstructionType.Test:
return "test";
case InstructionType.Jmp:
return "jmp";
case InstructionType.Je:
return "je";
case InstructionType.Jne:
return "jne";
case InstructionType.Jg:
return "jg";
case InstructionType.Jge:
return "jge";
case InstructionType.Jl:
return "jl";
case InstructionType.Jle:
return "jle";
default:
return type.ToString();
}
}
///
/// Handles a MOV instruction
///
/// The operation to populate
/// The operand parts
private void HandleMovInstruction(Operation operation, string[] operandParts)
{
if (operandParts.Length != 2)
{
return;
}
operation.Type = "assignment";
// Get or create the destination variable
Variable destination = GetOrCreateVariable(operandParts[0]);
operation.Destination = destination;
// Get the source (variable or constant)
object source = GetOperandValue(operandParts[1]);
operation.Sources.Add(source);
}
///
/// Handles an arithmetic instruction (ADD, SUB, MUL, DIV, AND, OR, XOR)
///
/// The operation to populate
/// The instruction mnemonic
/// The operand parts
private void HandleArithmeticInstruction(Operation operation, string mnemonic, string[] operandParts)
{
if (operandParts.Length != 2)
{
return;
}
operation.Type = mnemonic;
// Get or create the destination variable
Variable destination = GetOrCreateVariable(operandParts[0]);
operation.Destination = destination;
// Get the source (variable or constant)
object source = GetOperandValue(operandParts[1]);
operation.Sources.Add(source);
operation.Sources.Add(destination); // The destination is also a source in arithmetic operations
}
///
/// Handles a stack instruction (PUSH, POP)
///
/// The operation to populate
/// The instruction mnemonic
/// The operand parts
private void HandleStackInstruction(Operation operation, string mnemonic, string[] operandParts)
{
if (operandParts.Length != 1)
{
return;
}
operation.Type = mnemonic;
if (mnemonic == "push")
{
// For PUSH, the operand is the source
object source = GetOperandValue(operandParts[0]);
operation.Sources.Add(source);
}
else if (mnemonic == "pop")
{
// For POP, the operand is the destination
Variable destination = GetOrCreateVariable(operandParts[0]);
operation.Destination = destination;
}
}
///
/// Handles a CALL instruction
///
/// The operation to populate
/// The operand parts
private void HandleCallInstruction(Operation operation, string[] operandParts)
{
if (operandParts.Length != 1)
{
return;
}
operation.Type = "call";
// The operand is the function name or address
operation.Sources.Add(operandParts[0]);
}
///
/// Handles a RET instruction
///
/// The operation to populate
private void HandleReturnInstruction(Operation operation)
{
operation.Type = "return";
// The return value is in EAX
if (_registerVariables.TryGetValue("eax", out Variable? eax))
{
operation.Sources.Add(eax);
}
}
///
/// Handles a comparison instruction (CMP, TEST)
///
/// The operation to populate
/// The instruction mnemonic
/// The operand parts
private void HandleComparisonInstruction(Operation operation, string mnemonic, string[] operandParts)
{
if (operandParts.Length != 2)
{
return;
}
operation.Type = mnemonic;
// Get the operands
object left = GetOperandValue(operandParts[0]);
object right = GetOperandValue(operandParts[1]);
operation.Sources.Add(left);
operation.Sources.Add(right);
}
///
/// Handles a jump instruction (JMP, JE, JNE, etc.)
///
/// The operation to populate
/// The instruction mnemonic
/// The operand parts
private void HandleJumpInstruction(Operation operation, string mnemonic, string[] operandParts)
{
if (operandParts.Length != 1)
{
return;
}
operation.Type = mnemonic;
// The operand is the jump target
operation.Sources.Add(operandParts[0]);
}
///
/// Gets or creates a variable for an operand
///
/// The operand string
/// The variable
private Variable GetOrCreateVariable(string operand)
{
// Check if it's a register
if (IsRegister(operand))
{
string register = operand.ToLower();
if (_registerVariables.TryGetValue(register, out Variable? variable))
{
return variable;
}
}
// Check if it's a memory location
if (IsMemoryLocation(operand))
{
string normalizedLocation = NormalizeMemoryLocation(operand);
if (_memoryVariables.TryGetValue(normalizedLocation, out Variable? variable))
{
return variable;
}
// Create a new variable for this memory location
variable = new Variable
{
Name = $"var_{_variableCounter++}",
Location = normalizedLocation
};
_memoryVariables[normalizedLocation] = variable;
return variable;
}
// If it's neither a register nor a memory location, create a temporary variable
Variable tempVariable = new Variable
{
Name = $"temp_{_variableCounter++}",
Location = operand
};
return tempVariable;
}
///
/// Gets the value of an operand (variable or constant)
///
/// The operand string
/// The operand value (Variable or constant)
private object GetOperandValue(string operand)
{
// Check if it's a register or memory location
if (IsRegister(operand) || IsMemoryLocation(operand))
{
return GetOrCreateVariable(operand);
}
// Check if it's a hexadecimal constant
if (operand.StartsWith("0x") && operand.Length > 2)
{
if (int.TryParse(operand.Substring(2), System.Globalization.NumberStyles.HexNumber, null, out int value))
{
return value;
}
}
// Check if it's a decimal constant
if (int.TryParse(operand, out int decimalValue))
{
return decimalValue;
}
// Otherwise, return the operand as a string
return operand;
}
///
/// Checks if an operand is a register
///
/// The operand to check
/// True if the operand is a register
private bool IsRegister(string operand)
{
string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
"ax", "bx", "cx", "dx", "si", "di", "bp", "sp",
"al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" };
return Array.IndexOf(registers, operand.ToLower()) >= 0;
}
///
/// Checks if an operand is a memory location
///
/// The operand to check
/// True if the operand is a memory location
private bool IsMemoryLocation(string operand)
{
return operand.Contains('[') && operand.Contains(']');
}
///
/// Normalizes a memory location operand
///
/// The operand to normalize
/// The normalized memory location
private string NormalizeMemoryLocation(string operand)
{
// Extract the part inside the brackets
int startIndex = operand.IndexOf('[');
int endIndex = operand.IndexOf(']');
if (startIndex >= 0 && endIndex > startIndex)
{
string memoryReference = operand.Substring(startIndex + 1, endIndex - startIndex - 1).Trim();
return memoryReference;
}
return operand;
}
}