using X86Disassembler.Analysers.DecompilerTypes;
using X86Disassembler.X86;
using X86Disassembler.X86.Operands;
namespace X86Disassembler.Analysers;
///
/// Analyzes disassembled code to identify and track variables
///
public class VariableAnalyzer
{
///
/// The analyzer context
///
private readonly AnalyzerContext _context;
///
/// Creates a new variable analyzer
///
/// The analyzer context
public VariableAnalyzer(AnalyzerContext context)
{
_context = context;
}
///
/// Analyzes the function to identify stack variables
///
/// The function to analyze
public void AnalyzeStackVariables(Function function)
{
// Dictionary to track stack offsets and their corresponding variables
var stackOffsets = new Dictionary();
// First, identify the function prologue to determine stack frame setup
bool hasPushEbp = false;
bool hasMovEbpEsp = false;
int localSize = 0;
// Look for the function prologue pattern: push ebp; mov ebp, esp; sub esp, X
foreach (var block in function.AsmFunction.Blocks)
{
foreach (var instruction in block.Instructions)
{
// Look for push ebp
if (instruction.Type == InstructionType.Push &&
instruction.StructuredOperands.Count > 0 &&
instruction.StructuredOperands[0] is RegisterOperand regOp &&
regOp.Register == RegisterIndex.Bp)
{
hasPushEbp = true;
continue;
}
// Look for mov ebp, esp
if (instruction.Type == InstructionType.Mov &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand destReg &&
instruction.StructuredOperands[1] is RegisterOperand srcReg &&
destReg.Register == RegisterIndex.Bp &&
srcReg.Register == RegisterIndex.Sp)
{
hasMovEbpEsp = true;
continue;
}
// Look for sub esp, X to determine local variable space
if (instruction.Type == InstructionType.Sub &&
instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[0] is RegisterOperand subReg &&
instruction.StructuredOperands[1] is ImmediateOperand immOp &&
subReg.Register == RegisterIndex.Sp)
{
localSize = (int)immOp.Value;
break;
}
}
// If we found the complete prologue, no need to check more blocks
if (hasPushEbp && hasMovEbpEsp && localSize > 0)
{
break;
}
}
// If we didn't find a standard prologue, we can't reliably analyze stack variables
if (!hasPushEbp || !hasMovEbpEsp)
{
return;
}
// Now scan for memory accesses relative to EBP
foreach (var block in function.AsmFunction.Blocks)
{
foreach (var instruction in block.Instructions)
{
// Look for memory operands that reference [ebp+X] or [ebp-X]
foreach (var operand in instruction.StructuredOperands)
{
if (operand is DisplacementMemoryOperand memOp &&
memOp.BaseRegister == RegisterIndex.Bp)
{
// This is accessing memory relative to EBP
int offset = (int)memOp.Displacement;
// Determine if this is a parameter or local variable
if (offset > 0 && offset < 1000) // Positive offset = parameter (with reasonable limit)
{
// Parameters start at [ebp+8] (return address at [ebp+4], saved ebp at [ebp+0])
int paramIndex = (offset - 8) / 4; // Assuming 4-byte parameters
// Make sure we have enough parameters in the function
while (function.Parameters.Count <= paramIndex)
{
var param = new Variable($"param_{function.Parameters.Count + 1}", DataType.Unknown)
{
Storage = Variable.StorageType.Parameter,
StackOffset = 8 + (function.Parameters.Count * 4),
IsParameter = true,
ParameterIndex = function.Parameters.Count,
Size = 4 // Assume 4 bytes (32-bit)
};
function.Parameters.Add(param);
}
}
else if (offset < 0 && offset > -1000) // Negative offset = local variable (with reasonable limit)
{
// Check if we've already seen this offset
if (!stackOffsets.TryGetValue(offset, out var variable))
{
// Create a new local variable
variable = new Variable($"local_{Math.Abs(offset)}", DataType.Unknown)
{
Storage = Variable.StorageType.Stack,
StackOffset = offset,
Size = 4 // Assume 4 bytes (32-bit)
};
// Add to our tracking dictionaries
stackOffsets[offset] = variable;
function.LocalVariables.Add(variable);
}
// Track the usage of this variable
TrackVariableUsage(variable, instruction);
}
}
}
}
}
// Analyze register-based variables
AnalyzeRegisterVariables(function);
}
///
/// Analyzes register usage to identify variables stored in registers
///
/// The function to analyze
private void AnalyzeRegisterVariables(Function function)
{
// This is a more complex analysis that would track register values across blocks
// For now, we'll focus on identifying registers that hold consistent values
// Dictionary to track register variables
var registerVariables = new Dictionary();
// For each block, analyze register usage
foreach (var block in function.AsmFunction.Blocks)
{
// Check if we have register values for this block from data flow analysis
var registerValuesKey = "RegisterValues";
if (_context.GetAnalysisData>(block.Address, registerValuesKey) is Dictionary registerValues)
{
foreach (var kvp in registerValues)
{
var register = kvp.Key;
var valueInfo = kvp.Value;
// Skip special registers like ESP and EBP
if (register == RegisterIndex.Sp || register == RegisterIndex.Bp)
{
continue;
}
// If the register holds a constant value, it might be a variable
if (valueInfo.Type == DataFlowAnalyzer.ValueInfo.ValueType.Constant)
{
// Check if we already have a variable for this register
if (!registerVariables.TryGetValue(register, out var variable))
{
// Create a new register variable
variable = new Variable($"reg_{RegisterMapper.GetRegisterName(register, 32)}", DataType.Unknown)
{
Storage = Variable.StorageType.Register,
Register = register,
Size = 4 // Assume 4 bytes (32-bit)
};
// Add to our tracking dictionary
registerVariables[register] = variable;
function.RegisterVariables.Add(variable);
}
}
}
}
}
}
///
/// Tracks how a variable is used in an instruction
///
/// The variable to track
/// The instruction using the variable
private void TrackVariableUsage(Variable variable, Instruction instruction)
{
// For now, we'll just try to infer the variable type based on its usage
// If the variable is used in a comparison with 0, it might be a boolean
if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
{
if (instruction.StructuredOperands.Count > 1 &&
instruction.StructuredOperands[1] is ImmediateOperand immOp &&
immOp.Value == 0)
{
// This might be a boolean check
if (variable.Type == DataType.Unknown)
{
// Set to int for now as we don't have a bool type
variable.Type = DataType.Int;
}
}
}
// If the variable is used with string instructions, it might be a string
// Check for string operations - we don't have specific string instruction types yet
// Skip string detection for now as we don't have the specific instruction types
// We'll detect strings through other means later
// If the variable is used with floating-point instructions, it might be a float
// Check for floating-point operations
if (instruction.Type == InstructionType.Fld ||
instruction.Type == InstructionType.Fst ||
instruction.Type == InstructionType.Fstp)
{
if (variable.Type == DataType.Unknown)
{
variable.Type = DataType.Float;
}
}
}
}