diff --git a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs b/X86Disassembler/Analysers/ControlFlowAnalyzer.cs index 710e949..759d125 100644 --- a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs +++ b/X86Disassembler/Analysers/ControlFlowAnalyzer.cs @@ -42,7 +42,7 @@ public class ControlFlowAnalyzer /// The function to analyze private void IdentifyIfElseStructures(Function function) { - // For each block in the function + // First pass: identify basic if-else structures foreach (var block in function.AsmFunction.Blocks) { // Skip blocks that don't end with a conditional jump @@ -70,16 +70,84 @@ public class ControlFlowAnalyzer var fallthroughBlock = FindFallthroughBlock(block); if (fallthroughBlock != null) - { - // Store the if-else structure in the context - var ifElseStructure = new IfElseStructure + { + // Check if the fallthrough block ends with an unconditional jump + // This could indicate an if-else structure where the 'else' branch jumps to a common merge point + InstructionBlock? mergeBlock = null; + bool hasElseBlock = true; + + if (fallthroughBlock.Instructions.Count > 0 && + fallthroughBlock.Instructions[^1].Type == InstructionType.Jmp) + { + // Get the jump target address + ulong mergeAddress = GetJumpTargetAddress(fallthroughBlock.Instructions[^1]); + + // Find the merge block + if (_context.BlocksByAddress.TryGetValue(mergeAddress, out var potentialMergeBlock)) + { + mergeBlock = potentialMergeBlock; + } + } + + // Check if the 'then' block also jumps to the same merge point + if (mergeBlock != null && targetBlock.Instructions.Count > 0 && + targetBlock.Instructions[^1].Type == InstructionType.Jmp) + { + ulong thenJumpAddress = GetJumpTargetAddress(targetBlock.Instructions[^1]); + + if (thenJumpAddress == mergeBlock.Address) + { + // We have a classic if-else structure with a merge point + // Store the if-else structure in the context + var ifElseStructure = new IfElseStructure + { + ConditionBlock = block, + ThenBlock = targetBlock, + ElseBlock = fallthroughBlock, + MergeBlock = mergeBlock, + IsComplete = true // Both branches merge back + }; + + _context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure); + continue; + } + } + + // If we get here, we have a simple if-then or if-then-else without a clear merge point + var simpleIfStructure = new IfElseStructure { ConditionBlock = block, ThenBlock = targetBlock, - ElseBlock = fallthroughBlock + ElseBlock = hasElseBlock ? fallthroughBlock : null, + IsComplete = false // No clear merge point }; - _context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure); + _context.StoreAnalysisData(block.Address, "IfElseStructure", simpleIfStructure); + } + } + } + } + + // Second pass: identify nested if-else structures + foreach (var block in function.AsmFunction.Blocks) + { + var ifElseStructure = _context.GetAnalysisData(block.Address, "IfElseStructure"); + if (ifElseStructure != null) + { + // Check if the 'then' block contains another if-else structure + var nestedThenIf = _context.GetAnalysisData(ifElseStructure.ThenBlock.Address, "IfElseStructure"); + if (nestedThenIf != null) + { + ifElseStructure.NestedThenStructure = nestedThenIf; + } + + // Check if the 'else' block contains another if-else structure + if (ifElseStructure.ElseBlock != null) + { + var nestedElseIf = _context.GetAnalysisData(ifElseStructure.ElseBlock.Address, "IfElseStructure"); + if (nestedElseIf != null) + { + ifElseStructure.NestedElseStructure = nestedElseIf; } } } @@ -233,14 +301,34 @@ public class ControlFlowAnalyzer public InstructionBlock ConditionBlock { get; set; } = null!; /// - /// The block containing the 'then' branch + /// The block representing the 'then' branch (taken when condition is true) /// public InstructionBlock ThenBlock { get; set; } = null!; /// - /// The block containing the 'else' branch (may be null for if-then structures) + /// The block representing the 'else' branch (taken when condition is false) /// - public InstructionBlock ElseBlock { get; set; } = null!; + public InstructionBlock? ElseBlock { get; set; } + + /// + /// The block where both branches merge back together (if applicable) + /// + public InstructionBlock? MergeBlock { get; set; } + + /// + /// Whether this is a complete if-else structure with a merge point + /// + public bool IsComplete { get; set; } + + /// + /// Nested if-else structure in the 'then' branch (if any) + /// + public IfElseStructure? NestedThenStructure { get; set; } + + /// + /// Nested if-else structure in the 'else' branch (if any) + /// + public IfElseStructure? NestedElseStructure { get; set; } } /// diff --git a/X86Disassembler/Analysers/PseudocodeGenerator.cs b/X86Disassembler/Analysers/PseudocodeGenerator.cs index 391c980..18a6af5 100644 --- a/X86Disassembler/Analysers/PseudocodeGenerator.cs +++ b/X86Disassembler/Analysers/PseudocodeGenerator.cs @@ -273,32 +273,53 @@ public class PseudocodeGenerator // Add the if statement string indent = new string(' ', indentLevel * 4); - result.AppendLine($"{indent}// If-else structure at 0x{ifElseStructure.ConditionBlock.Address:X8}") - .AppendLine($"{indent}if ({condition})"); + result.AppendLine($"{indent}if ({condition})"); + result.AppendLine($"{indent}{{"); - // Add the then branch - result.AppendLine($"{indent}{{") - .AppendLine($"{indent} // Then branch at 0x{ifElseStructure.ThenBlock.Address:X8}"); + // Check if the 'then' branch contains a nested if-else structure + if (ifElseStructure.NestedThenStructure != null) + { + // Generate code for the nested if-else structure in the 'then' branch + GenerateIfElseCode(function, ifElseStructure.NestedThenStructure, result, indentLevel + 1, processedBlocks); + } + else + { + // Generate code for the 'then' branch normally + GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks); + } - // Generate code for the then branch - GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks); - - // Close the then branch + // Close the 'then' branch result.AppendLine($"{indent}}}"); - // Add the else branch if it exists and is not already processed + // Add the 'else' branch if it exists and is not already processed if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address)) { - result.AppendLine($"{indent}else") - .AppendLine($"{indent}{{") - .AppendLine($"{indent} // Else branch at 0x{ifElseStructure.ElseBlock.Address:X8}"); + result.AppendLine($"{indent}else"); + result.AppendLine($"{indent}{{"); - // Generate code for the else branch - GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks); + // Check if the 'else' branch contains a nested if-else structure (else-if) + if (ifElseStructure.NestedElseStructure != null) + { + // Generate code for the nested if-else structure in the 'else' branch + GenerateIfElseCode(function, ifElseStructure.NestedElseStructure, result, indentLevel + 1, processedBlocks); + } + else + { + // Generate code for the 'else' branch normally + GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks); + } - // Close the else branch + // Close the 'else' branch result.AppendLine($"{indent}}}"); } + + // If this is a complete if-else structure with a merge point, and the merge point hasn't been processed yet + if (ifElseStructure.IsComplete && ifElseStructure.MergeBlock != null && + !processedBlocks.Contains(ifElseStructure.MergeBlock.Address)) + { + // Generate code for the merge block + GenerateBlockCode(function, ifElseStructure.MergeBlock, result, indentLevel, processedBlocks); + } } /// @@ -387,9 +408,47 @@ public class PseudocodeGenerator /// A string representing the condition expression private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock) { - // For now, we'll just return a placeholder - // In a real implementation, we would analyze the instructions to determine the condition - return "/* condition */"; + // If the block is empty, return a placeholder + if (conditionBlock.Instructions.Count == 0) + { + return "condition"; + } + + // Get the last instruction (should be a conditional jump) + var lastInstruction = conditionBlock.Instructions[^1]; + + // If it's not a conditional jump, return a placeholder + if (!IsConditionalJump(lastInstruction.Type)) + { + return "condition"; + } + + // Look for a CMP or TEST instruction that sets the flags for this jump + Instruction? comparisonInstruction = null; + + // Search backwards from the jump instruction to find a comparison + for (int i = conditionBlock.Instructions.Count - 2; i >= 0; i--) + { + var instruction = conditionBlock.Instructions[i]; + if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test) + { + comparisonInstruction = instruction; + break; + } + } + + // If we found a comparison instruction, generate a condition based on it and the jump + if (comparisonInstruction != null && comparisonInstruction.StructuredOperands.Count >= 2) + { + var left = FormatOperand(comparisonInstruction.StructuredOperands[0]); + var right = FormatOperand(comparisonInstruction.StructuredOperands[1]); + + // Generate condition based on jump type + return GenerateConditionFromJump(lastInstruction, left, right); + } + + // If we couldn't find a comparison instruction, just use the jump condition + return GenerateConditionFromJump(lastInstruction, null, null); } /// @@ -400,6 +459,21 @@ public class PseudocodeGenerator /// The generated pseudocode private string GenerateInstructionPseudocode(Function function, Instruction instruction) { + // Check for special cases first + if (instruction.Type == InstructionType.Xor && instruction.StructuredOperands.Count >= 2) + { + var dest = instruction.StructuredOperands[0]; + var src = instruction.StructuredOperands[1]; + + // Check for XOR with self (zeroing a register) + if (dest is RegisterOperand regDest && src is RegisterOperand regSrc && + regDest.Register == regSrc.Register) + { + // This is a common idiom to zero a register + return $"{FormatOperand(dest)} = 0; // XOR with self to zero register"; + } + } + // Handle different instruction types switch (instruction.Type) { @@ -410,6 +484,12 @@ public class PseudocodeGenerator var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; + // Special case for moving 0 (common initialization pattern) + if (src is ImmediateOperand immSrc && immSrc.Value == 0) + { + return $"{FormatOperand(dest)} = 0; // Initialize to zero"; + } + return $"{FormatOperand(dest)} = {FormatOperand(src)};"; } break; @@ -421,6 +501,12 @@ public class PseudocodeGenerator var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; + // Special case for adding 1 (increment) + if (src is ImmediateOperand immSrc && immSrc.Value == 1) + { + return $"{FormatOperand(dest)}++; // Increment"; + } + return $"{FormatOperand(dest)} += {FormatOperand(src)};"; } break; @@ -432,6 +518,12 @@ public class PseudocodeGenerator var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; + // Special case for subtracting 1 (decrement) + if (src is ImmediateOperand immSrc && immSrc.Value == 1) + { + return $"{FormatOperand(dest)}--; // Decrement"; + } + return $"{FormatOperand(dest)} -= {FormatOperand(src)};"; } break; @@ -446,7 +538,7 @@ public class PseudocodeGenerator return $"{FormatOperand(dest)} &= {FormatOperand(src)};"; } break; - + case InstructionType.Or: // Handle OR instruction if (instruction.StructuredOperands.Count >= 2) @@ -465,36 +557,39 @@ public class PseudocodeGenerator var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; - // Special case: xor eax, eax is used to zero a register - if (dest is RegisterOperand destReg && src is RegisterOperand srcReg && - destReg.Register == srcReg.Register) - { - return $"{FormatOperand(dest)} = 0;"; - } - + // We already handled the special case of XOR with self above return $"{FormatOperand(dest)} ^= {FormatOperand(src)};"; } break; case InstructionType.Test: - // Handle TEST instruction (used for condition testing) + // Handle TEST instruction (no assignment, just sets flags) if (instruction.StructuredOperands.Count >= 2) { - var op1 = instruction.StructuredOperands[0]; - var op2 = instruction.StructuredOperands[1]; + var left = instruction.StructuredOperands[0]; + var right = instruction.StructuredOperands[1]; - return $"// Test {FormatOperand(op1)} & {FormatOperand(op2)}"; + // Special case for TEST with self (checking if a register is zero) + if (left is RegisterOperand regLeft && right is RegisterOperand regRight && + regLeft.Register == regRight.Register) + { + return $"// Check if {FormatOperand(left)} is zero"; + } + + return $"// Test {FormatOperand(left)} & {FormatOperand(right)}"; } break; case InstructionType.Cmp: - // Handle CMP instruction (used for condition testing) + // Handle CMP instruction (no assignment, just sets flags) if (instruction.StructuredOperands.Count >= 2) { - var op1 = instruction.StructuredOperands[0]; - var op2 = instruction.StructuredOperands[1]; + var left = instruction.StructuredOperands[0]; + var right = instruction.StructuredOperands[1]; - return $"// Compare {FormatOperand(op1)} with {FormatOperand(op2)}"; + // For CMP, we'll return a comment that explains what's being compared + // This will help with understanding the following conditional jumps + return $"// Compare {FormatOperand(left)} with {FormatOperand(right)}"; } break; @@ -504,17 +599,21 @@ public class PseudocodeGenerator { var target = instruction.StructuredOperands[0]; - return $"call({FormatOperand(target)});"; + // For function calls, we'll generate a proper function call expression + return $"{FormatOperand(target)}(); // Function call"; } break; + case InstructionType.Ret: + // Handle RET instruction + return "return 0; // Placeholder return value"; + case InstructionType.Push: // Handle PUSH instruction if (instruction.StructuredOperands.Count >= 1) { - var value = instruction.StructuredOperands[0]; - - return $"push({FormatOperand(value)});"; + var src = instruction.StructuredOperands[0]; + return $"// Push {FormatOperand(src)} onto stack"; } break; @@ -523,14 +622,64 @@ public class PseudocodeGenerator if (instruction.StructuredOperands.Count >= 1) { var dest = instruction.StructuredOperands[0]; - - return $"{FormatOperand(dest)} = pop();"; + return $"{FormatOperand(dest)} = pop(); // Pop from stack"; } break; + + case InstructionType.Inc: + // Handle INC instruction + if (instruction.StructuredOperands.Count >= 1) + { + var dest = instruction.StructuredOperands[0]; + return $"{FormatOperand(dest)}++;"; + } + break; + + case InstructionType.Dec: + // Handle DEC instruction + if (instruction.StructuredOperands.Count >= 1) + { + var dest = instruction.StructuredOperands[0]; + return $"{FormatOperand(dest)}--;"; + } + break; + + case InstructionType.Shl: + // Handle SHL/SAL instruction (shift left) + if (instruction.StructuredOperands.Count >= 2) + { + var dest = instruction.StructuredOperands[0]; + var count = instruction.StructuredOperands[1]; + return $"{FormatOperand(dest)} <<= {FormatOperand(count)};"; + } + break; + + case InstructionType.Shr: + // Handle SHR instruction (shift right logical) + if (instruction.StructuredOperands.Count >= 2) + { + var dest = instruction.StructuredOperands[0]; + var count = instruction.StructuredOperands[1]; + return $"{FormatOperand(dest)} >>>= {FormatOperand(count)}; // Logical shift right"; + } + break; + + case InstructionType.Sar: + // Handle SAR instruction (shift right arithmetic) + if (instruction.StructuredOperands.Count >= 2) + { + var dest = instruction.StructuredOperands[0]; + var count = instruction.StructuredOperands[1]; + return $"{FormatOperand(dest)} >>= {FormatOperand(count)}; // Arithmetic shift right"; + } + break; + + default: + // For other instructions, just add a comment + return $"// {instruction}"; } - // If we couldn't generate pseudocode, return a comment with the instruction - return $"/* {instruction} */"; + return string.Empty; } /// @@ -654,49 +803,28 @@ public class PseudocodeGenerator /// Generates a condition expression based on a conditional jump instruction /// /// The conditional jump instruction + /// The left operand of the comparison, if available + /// The right operand of the comparison, if available /// A string representing the condition expression - private string GenerateConditionFromJump(Instruction instruction) + private string GenerateConditionFromJump(Instruction instruction, string? left = null, string? right = null) { - // Map jump types to their equivalent C-like conditions - // Note: These are inverted because the jump is taken when the condition is true, - // but in C-like code, the condition is for the 'if' statement + // If we don't have comparison operands, use a generic condition + if (left == null || right == null) + { + switch (instruction.Type) + { + case InstructionType.Jz: return "zero flag is set"; + case InstructionType.Jnz: return "zero flag is not set"; + default: return "condition"; + } + } + + // If we have comparison operands, generate a more specific condition switch (instruction.Type) { - case InstructionType.Jz: // Jump if Zero (ZF=1) - return "condition == 0"; - - case InstructionType.Jnz: // Jump if Not Zero (ZF=0) - return "condition != 0"; - - case InstructionType.Jg: // Jump if Greater (ZF=0 and SF=OF) - return "condition > 0"; - - case InstructionType.Jge: // Jump if Greater or Equal (SF=OF) - return "condition >= 0"; - - case InstructionType.Jl: // Jump if Less (SF!=OF) - return "condition < 0"; - - case InstructionType.Jle: // Jump if Less or Equal (ZF=1 or SF!=OF) - return "condition <= 0"; - - case InstructionType.Ja: // Jump if Above (CF=0 and ZF=0) - return "condition > 0 /* unsigned */"; - - case InstructionType.Jae: // Jump if Above or Equal (CF=0) - return "condition >= 0 /* unsigned */"; - - case InstructionType.Jb: // Jump if Below (CF=1) - return "condition < 0 /* unsigned */"; - - case InstructionType.Jbe: // Jump if Below or Equal (CF=1 or ZF=1) - return "condition <= 0 /* unsigned */"; - - // Add more cases for other conditional jumps as needed - - default: - // For unknown jump types, use a generic condition - return "/* unknown condition */"; + case InstructionType.Jz: return $"{left} == 0"; + case InstructionType.Jnz: return $"{left} != 0"; + default: return $"{left} ? {right}"; } } }