0
mirror of https://github.com/sampletext32/ParkanPlayground.git synced 2025-07-01 12:40:25 +03:00

Enhance control flow analysis and pseudocode generation

This commit is contained in:
bird_egop
2025-04-18 21:52:48 +03:00
parent 883f3a2659
commit 0ddbfd2951
2 changed files with 307 additions and 91 deletions

View File

@ -42,7 +42,7 @@ public class ControlFlowAnalyzer
/// <param name="function">The function to analyze</param> /// <param name="function">The function to analyze</param>
private void IdentifyIfElseStructures(Function function) private void IdentifyIfElseStructures(Function function)
{ {
// For each block in the function // First pass: identify basic if-else structures
foreach (var block in function.AsmFunction.Blocks) foreach (var block in function.AsmFunction.Blocks)
{ {
// Skip blocks that don't end with a conditional jump // Skip blocks that don't end with a conditional jump
@ -71,15 +71,83 @@ public class ControlFlowAnalyzer
if (fallthroughBlock != null) if (fallthroughBlock != null)
{ {
// Store the if-else structure in the context // Check if the fallthrough block ends with an unconditional jump
var ifElseStructure = new IfElseStructure // This could indicate an if-else structure where the 'else' branch jumps to a common merge point
InstructionBlock? mergeBlock = null;
bool hasElseBlock = true;
if (fallthroughBlock.Instructions.Count > 0 &&
fallthroughBlock.Instructions[^1].Type == InstructionType.Jmp)
{
// Get the jump target address
ulong mergeAddress = GetJumpTargetAddress(fallthroughBlock.Instructions[^1]);
// Find the merge block
if (_context.BlocksByAddress.TryGetValue(mergeAddress, out var potentialMergeBlock))
{
mergeBlock = potentialMergeBlock;
}
}
// Check if the 'then' block also jumps to the same merge point
if (mergeBlock != null && targetBlock.Instructions.Count > 0 &&
targetBlock.Instructions[^1].Type == InstructionType.Jmp)
{
ulong thenJumpAddress = GetJumpTargetAddress(targetBlock.Instructions[^1]);
if (thenJumpAddress == mergeBlock.Address)
{
// We have a classic if-else structure with a merge point
// Store the if-else structure in the context
var ifElseStructure = new IfElseStructure
{
ConditionBlock = block,
ThenBlock = targetBlock,
ElseBlock = fallthroughBlock,
MergeBlock = mergeBlock,
IsComplete = true // Both branches merge back
};
_context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure);
continue;
}
}
// If we get here, we have a simple if-then or if-then-else without a clear merge point
var simpleIfStructure = new IfElseStructure
{ {
ConditionBlock = block, ConditionBlock = block,
ThenBlock = targetBlock, ThenBlock = targetBlock,
ElseBlock = fallthroughBlock ElseBlock = hasElseBlock ? fallthroughBlock : null,
IsComplete = false // No clear merge point
}; };
_context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure); _context.StoreAnalysisData(block.Address, "IfElseStructure", simpleIfStructure);
}
}
}
}
// Second pass: identify nested if-else structures
foreach (var block in function.AsmFunction.Blocks)
{
var ifElseStructure = _context.GetAnalysisData<IfElseStructure>(block.Address, "IfElseStructure");
if (ifElseStructure != null)
{
// Check if the 'then' block contains another if-else structure
var nestedThenIf = _context.GetAnalysisData<IfElseStructure>(ifElseStructure.ThenBlock.Address, "IfElseStructure");
if (nestedThenIf != null)
{
ifElseStructure.NestedThenStructure = nestedThenIf;
}
// Check if the 'else' block contains another if-else structure
if (ifElseStructure.ElseBlock != null)
{
var nestedElseIf = _context.GetAnalysisData<IfElseStructure>(ifElseStructure.ElseBlock.Address, "IfElseStructure");
if (nestedElseIf != null)
{
ifElseStructure.NestedElseStructure = nestedElseIf;
} }
} }
} }
@ -233,14 +301,34 @@ public class ControlFlowAnalyzer
public InstructionBlock ConditionBlock { get; set; } = null!; public InstructionBlock ConditionBlock { get; set; } = null!;
/// <summary> /// <summary>
/// The block containing the 'then' branch /// The block representing the 'then' branch (taken when condition is true)
/// </summary> /// </summary>
public InstructionBlock ThenBlock { get; set; } = null!; public InstructionBlock ThenBlock { get; set; } = null!;
/// <summary> /// <summary>
/// The block containing the 'else' branch (may be null for if-then structures) /// The block representing the 'else' branch (taken when condition is false)
/// </summary> /// </summary>
public InstructionBlock ElseBlock { get; set; } = null!; public InstructionBlock? ElseBlock { get; set; }
/// <summary>
/// The block where both branches merge back together (if applicable)
/// </summary>
public InstructionBlock? MergeBlock { get; set; }
/// <summary>
/// Whether this is a complete if-else structure with a merge point
/// </summary>
public bool IsComplete { get; set; }
/// <summary>
/// Nested if-else structure in the 'then' branch (if any)
/// </summary>
public IfElseStructure? NestedThenStructure { get; set; }
/// <summary>
/// Nested if-else structure in the 'else' branch (if any)
/// </summary>
public IfElseStructure? NestedElseStructure { get; set; }
} }
/// <summary> /// <summary>

View File

@ -273,32 +273,53 @@ public class PseudocodeGenerator
// Add the if statement // Add the if statement
string indent = new string(' ', indentLevel * 4); string indent = new string(' ', indentLevel * 4);
result.AppendLine($"{indent}// If-else structure at 0x{ifElseStructure.ConditionBlock.Address:X8}") result.AppendLine($"{indent}if ({condition})");
.AppendLine($"{indent}if ({condition})"); result.AppendLine($"{indent}{{");
// Add the then branch // Check if the 'then' branch contains a nested if-else structure
result.AppendLine($"{indent}{{") if (ifElseStructure.NestedThenStructure != null)
.AppendLine($"{indent} // Then branch at 0x{ifElseStructure.ThenBlock.Address:X8}"); {
// Generate code for the nested if-else structure in the 'then' branch
GenerateIfElseCode(function, ifElseStructure.NestedThenStructure, result, indentLevel + 1, processedBlocks);
}
else
{
// Generate code for the 'then' branch normally
GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks);
}
// Generate code for the then branch // Close the 'then' branch
GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks);
// Close the then branch
result.AppendLine($"{indent}}}"); result.AppendLine($"{indent}}}");
// Add the else branch if it exists and is not already processed // Add the 'else' branch if it exists and is not already processed
if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address)) if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address))
{ {
result.AppendLine($"{indent}else") result.AppendLine($"{indent}else");
.AppendLine($"{indent}{{") result.AppendLine($"{indent}{{");
.AppendLine($"{indent} // Else branch at 0x{ifElseStructure.ElseBlock.Address:X8}");
// Generate code for the else branch // Check if the 'else' branch contains a nested if-else structure (else-if)
GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks); if (ifElseStructure.NestedElseStructure != null)
{
// Generate code for the nested if-else structure in the 'else' branch
GenerateIfElseCode(function, ifElseStructure.NestedElseStructure, result, indentLevel + 1, processedBlocks);
}
else
{
// Generate code for the 'else' branch normally
GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks);
}
// Close the else branch // Close the 'else' branch
result.AppendLine($"{indent}}}"); result.AppendLine($"{indent}}}");
} }
// If this is a complete if-else structure with a merge point, and the merge point hasn't been processed yet
if (ifElseStructure.IsComplete && ifElseStructure.MergeBlock != null &&
!processedBlocks.Contains(ifElseStructure.MergeBlock.Address))
{
// Generate code for the merge block
GenerateBlockCode(function, ifElseStructure.MergeBlock, result, indentLevel, processedBlocks);
}
} }
/// <summary> /// <summary>
@ -387,9 +408,47 @@ public class PseudocodeGenerator
/// <returns>A string representing the condition expression</returns> /// <returns>A string representing the condition expression</returns>
private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock) private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock)
{ {
// For now, we'll just return a placeholder // If the block is empty, return a placeholder
// In a real implementation, we would analyze the instructions to determine the condition if (conditionBlock.Instructions.Count == 0)
return "/* condition */"; {
return "condition";
}
// Get the last instruction (should be a conditional jump)
var lastInstruction = conditionBlock.Instructions[^1];
// If it's not a conditional jump, return a placeholder
if (!IsConditionalJump(lastInstruction.Type))
{
return "condition";
}
// Look for a CMP or TEST instruction that sets the flags for this jump
Instruction? comparisonInstruction = null;
// Search backwards from the jump instruction to find a comparison
for (int i = conditionBlock.Instructions.Count - 2; i >= 0; i--)
{
var instruction = conditionBlock.Instructions[i];
if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test)
{
comparisonInstruction = instruction;
break;
}
}
// If we found a comparison instruction, generate a condition based on it and the jump
if (comparisonInstruction != null && comparisonInstruction.StructuredOperands.Count >= 2)
{
var left = FormatOperand(comparisonInstruction.StructuredOperands[0]);
var right = FormatOperand(comparisonInstruction.StructuredOperands[1]);
// Generate condition based on jump type
return GenerateConditionFromJump(lastInstruction, left, right);
}
// If we couldn't find a comparison instruction, just use the jump condition
return GenerateConditionFromJump(lastInstruction, null, null);
} }
/// <summary> /// <summary>
@ -400,6 +459,21 @@ public class PseudocodeGenerator
/// <returns>The generated pseudocode</returns> /// <returns>The generated pseudocode</returns>
private string GenerateInstructionPseudocode(Function function, Instruction instruction) private string GenerateInstructionPseudocode(Function function, Instruction instruction)
{ {
// Check for special cases first
if (instruction.Type == InstructionType.Xor && instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1];
// Check for XOR with self (zeroing a register)
if (dest is RegisterOperand regDest && src is RegisterOperand regSrc &&
regDest.Register == regSrc.Register)
{
// This is a common idiom to zero a register
return $"{FormatOperand(dest)} = 0; // XOR with self to zero register";
}
}
// Handle different instruction types // Handle different instruction types
switch (instruction.Type) switch (instruction.Type)
{ {
@ -410,6 +484,12 @@ public class PseudocodeGenerator
var dest = instruction.StructuredOperands[0]; var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1]; var src = instruction.StructuredOperands[1];
// Special case for moving 0 (common initialization pattern)
if (src is ImmediateOperand immSrc && immSrc.Value == 0)
{
return $"{FormatOperand(dest)} = 0; // Initialize to zero";
}
return $"{FormatOperand(dest)} = {FormatOperand(src)};"; return $"{FormatOperand(dest)} = {FormatOperand(src)};";
} }
break; break;
@ -421,6 +501,12 @@ public class PseudocodeGenerator
var dest = instruction.StructuredOperands[0]; var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1]; var src = instruction.StructuredOperands[1];
// Special case for adding 1 (increment)
if (src is ImmediateOperand immSrc && immSrc.Value == 1)
{
return $"{FormatOperand(dest)}++; // Increment";
}
return $"{FormatOperand(dest)} += {FormatOperand(src)};"; return $"{FormatOperand(dest)} += {FormatOperand(src)};";
} }
break; break;
@ -432,6 +518,12 @@ public class PseudocodeGenerator
var dest = instruction.StructuredOperands[0]; var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1]; var src = instruction.StructuredOperands[1];
// Special case for subtracting 1 (decrement)
if (src is ImmediateOperand immSrc && immSrc.Value == 1)
{
return $"{FormatOperand(dest)}--; // Decrement";
}
return $"{FormatOperand(dest)} -= {FormatOperand(src)};"; return $"{FormatOperand(dest)} -= {FormatOperand(src)};";
} }
break; break;
@ -465,36 +557,39 @@ public class PseudocodeGenerator
var dest = instruction.StructuredOperands[0]; var dest = instruction.StructuredOperands[0];
var src = instruction.StructuredOperands[1]; var src = instruction.StructuredOperands[1];
// Special case: xor eax, eax is used to zero a register // We already handled the special case of XOR with self above
if (dest is RegisterOperand destReg && src is RegisterOperand srcReg &&
destReg.Register == srcReg.Register)
{
return $"{FormatOperand(dest)} = 0;";
}
return $"{FormatOperand(dest)} ^= {FormatOperand(src)};"; return $"{FormatOperand(dest)} ^= {FormatOperand(src)};";
} }
break; break;
case InstructionType.Test: case InstructionType.Test:
// Handle TEST instruction (used for condition testing) // Handle TEST instruction (no assignment, just sets flags)
if (instruction.StructuredOperands.Count >= 2) if (instruction.StructuredOperands.Count >= 2)
{ {
var op1 = instruction.StructuredOperands[0]; var left = instruction.StructuredOperands[0];
var op2 = instruction.StructuredOperands[1]; var right = instruction.StructuredOperands[1];
return $"// Test {FormatOperand(op1)} & {FormatOperand(op2)}"; // Special case for TEST with self (checking if a register is zero)
if (left is RegisterOperand regLeft && right is RegisterOperand regRight &&
regLeft.Register == regRight.Register)
{
return $"// Check if {FormatOperand(left)} is zero";
}
return $"// Test {FormatOperand(left)} & {FormatOperand(right)}";
} }
break; break;
case InstructionType.Cmp: case InstructionType.Cmp:
// Handle CMP instruction (used for condition testing) // Handle CMP instruction (no assignment, just sets flags)
if (instruction.StructuredOperands.Count >= 2) if (instruction.StructuredOperands.Count >= 2)
{ {
var op1 = instruction.StructuredOperands[0]; var left = instruction.StructuredOperands[0];
var op2 = instruction.StructuredOperands[1]; var right = instruction.StructuredOperands[1];
return $"// Compare {FormatOperand(op1)} with {FormatOperand(op2)}"; // For CMP, we'll return a comment that explains what's being compared
// This will help with understanding the following conditional jumps
return $"// Compare {FormatOperand(left)} with {FormatOperand(right)}";
} }
break; break;
@ -504,17 +599,21 @@ public class PseudocodeGenerator
{ {
var target = instruction.StructuredOperands[0]; var target = instruction.StructuredOperands[0];
return $"call({FormatOperand(target)});"; // For function calls, we'll generate a proper function call expression
return $"{FormatOperand(target)}(); // Function call";
} }
break; break;
case InstructionType.Ret:
// Handle RET instruction
return "return 0; // Placeholder return value";
case InstructionType.Push: case InstructionType.Push:
// Handle PUSH instruction // Handle PUSH instruction
if (instruction.StructuredOperands.Count >= 1) if (instruction.StructuredOperands.Count >= 1)
{ {
var value = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[0];
return $"// Push {FormatOperand(src)} onto stack";
return $"push({FormatOperand(value)});";
} }
break; break;
@ -523,14 +622,64 @@ public class PseudocodeGenerator
if (instruction.StructuredOperands.Count >= 1) if (instruction.StructuredOperands.Count >= 1)
{ {
var dest = instruction.StructuredOperands[0]; var dest = instruction.StructuredOperands[0];
return $"{FormatOperand(dest)} = pop(); // Pop from stack";
return $"{FormatOperand(dest)} = pop();";
} }
break; break;
case InstructionType.Inc:
// Handle INC instruction
if (instruction.StructuredOperands.Count >= 1)
{
var dest = instruction.StructuredOperands[0];
return $"{FormatOperand(dest)}++;";
}
break;
case InstructionType.Dec:
// Handle DEC instruction
if (instruction.StructuredOperands.Count >= 1)
{
var dest = instruction.StructuredOperands[0];
return $"{FormatOperand(dest)}--;";
}
break;
case InstructionType.Shl:
// Handle SHL/SAL instruction (shift left)
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var count = instruction.StructuredOperands[1];
return $"{FormatOperand(dest)} <<= {FormatOperand(count)};";
}
break;
case InstructionType.Shr:
// Handle SHR instruction (shift right logical)
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var count = instruction.StructuredOperands[1];
return $"{FormatOperand(dest)} >>>= {FormatOperand(count)}; // Logical shift right";
}
break;
case InstructionType.Sar:
// Handle SAR instruction (shift right arithmetic)
if (instruction.StructuredOperands.Count >= 2)
{
var dest = instruction.StructuredOperands[0];
var count = instruction.StructuredOperands[1];
return $"{FormatOperand(dest)} >>= {FormatOperand(count)}; // Arithmetic shift right";
}
break;
default:
// For other instructions, just add a comment
return $"// {instruction}";
} }
// If we couldn't generate pseudocode, return a comment with the instruction return string.Empty;
return $"/* {instruction} */";
} }
/// <summary> /// <summary>
@ -654,49 +803,28 @@ public class PseudocodeGenerator
/// Generates a condition expression based on a conditional jump instruction /// Generates a condition expression based on a conditional jump instruction
/// </summary> /// </summary>
/// <param name="instruction">The conditional jump instruction</param> /// <param name="instruction">The conditional jump instruction</param>
/// <param name="left">The left operand of the comparison, if available</param>
/// <param name="right">The right operand of the comparison, if available</param>
/// <returns>A string representing the condition expression</returns> /// <returns>A string representing the condition expression</returns>
private string GenerateConditionFromJump(Instruction instruction) private string GenerateConditionFromJump(Instruction instruction, string? left = null, string? right = null)
{ {
// Map jump types to their equivalent C-like conditions // If we don't have comparison operands, use a generic condition
// Note: These are inverted because the jump is taken when the condition is true, if (left == null || right == null)
// but in C-like code, the condition is for the 'if' statement {
switch (instruction.Type)
{
case InstructionType.Jz: return "zero flag is set";
case InstructionType.Jnz: return "zero flag is not set";
default: return "condition";
}
}
// If we have comparison operands, generate a more specific condition
switch (instruction.Type) switch (instruction.Type)
{ {
case InstructionType.Jz: // Jump if Zero (ZF=1) case InstructionType.Jz: return $"{left} == 0";
return "condition == 0"; case InstructionType.Jnz: return $"{left} != 0";
default: return $"{left} ? {right}";
case InstructionType.Jnz: // Jump if Not Zero (ZF=0)
return "condition != 0";
case InstructionType.Jg: // Jump if Greater (ZF=0 and SF=OF)
return "condition > 0";
case InstructionType.Jge: // Jump if Greater or Equal (SF=OF)
return "condition >= 0";
case InstructionType.Jl: // Jump if Less (SF!=OF)
return "condition < 0";
case InstructionType.Jle: // Jump if Less or Equal (ZF=1 or SF!=OF)
return "condition <= 0";
case InstructionType.Ja: // Jump if Above (CF=0 and ZF=0)
return "condition > 0 /* unsigned */";
case InstructionType.Jae: // Jump if Above or Equal (CF=0)
return "condition >= 0 /* unsigned */";
case InstructionType.Jb: // Jump if Below (CF=1)
return "condition < 0 /* unsigned */";
case InstructionType.Jbe: // Jump if Below or Equal (CF=1 or ZF=1)
return "condition <= 0 /* unsigned */";
// Add more cases for other conditional jumps as needed
default:
// For unknown jump types, use a generic condition
return "/* unknown condition */";
} }
} }
} }