using System.Text; using X86Disassembler.Analysers.DecompilerTypes; using X86Disassembler.X86; using X86Disassembler.X86.Operands; namespace X86Disassembler.Analysers; /// /// Generates C-like pseudocode from decompiled functions /// public class PseudocodeGenerator { /// /// Generates pseudocode for a decompiled function /// /// The function to generate pseudocode for /// The generated pseudocode public string GeneratePseudocode(Function function) { var result = new StringBuilder(); // Add function signature result.AppendLine($"{function.ReturnType} {function.Name}({string.Join(", ", function.Parameters.Select(p => $"{p.Type} {p.Name}"))})") .AppendLine("{"); // Add local variable declarations foreach (var localVar in function.LocalVariables) { result.AppendLine($" {localVar.Type} {localVar.Name}; // Stack offset: {localVar.StackOffset}"); } // Add register variable declarations foreach (var regVar in function.RegisterVariables) { result.AppendLine($" {regVar.Type} {regVar.Name}; // Register: {RegisterMapper.GetRegisterName(regVar.Register!.Value, 32)}"); } if (function.LocalVariables.Count > 0 || function.RegisterVariables.Count > 0) { result.AppendLine(); } // Generate the function body using control flow analysis GenerateFunctionBody(function, result, 1); // Add a return statement result.AppendLine() .AppendLine(" return 0; // Placeholder return value") .AppendLine("}"); return result.ToString(); } /// /// Generates the body of the function using control flow analysis /// /// The function to generate code for /// The string builder to append to /// The current indentation level private void GenerateFunctionBody(Function function, StringBuilder result, int indentLevel) { // Try to find the entry block var entryBlock = function.AsmFunction.EntryBlock; // If the entry block is not found, try to find a block with an address that matches the function address minus the base address if (entryBlock == null && function.AsmFunction.Blocks.Count > 0) { // Get the first block as a fallback entryBlock = function.AsmFunction.Blocks[0]; // Log a warning but continue with the first block result.AppendLine($"{new string(' ', indentLevel * 4)}// Warning: Entry block not found at address 0x{function.Address:X8}, using first block at 0x{entryBlock.Address:X8}"); } else if (entryBlock == null) { result.AppendLine($"{new string(' ', indentLevel * 4)}// Function body could not be decompiled - no blocks found"); return; } // Process blocks in order, starting from the entry block var processedBlocks = new HashSet(); GenerateBlockCode(function, entryBlock, result, indentLevel, processedBlocks); } /// /// Generates code for a basic block and its successors /// /// The function containing the block /// The block to generate code for /// The string builder to append to /// The current indentation level /// Set of blocks that have already been processed private void GenerateBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet processedBlocks) { // Check if we've already processed this block if (processedBlocks.Contains(block.Address)) { return; } // Mark this block as processed processedBlocks.Add(block.Address); // Check if this block is part of a control flow structure var context = function.AsmFunction.Context; // Check for if-else structure var ifElseStructure = context.GetAnalysisData(block.Address, "IfElseStructure"); if (ifElseStructure != null && ifElseStructure.ConditionBlock.Address == block.Address) { // This block is the condition of an if-else structure GenerateIfElseCode(function, ifElseStructure, result, indentLevel, processedBlocks); return; } // Check for switch structure var switchStructure = context.GetAnalysisData(block.Address, "SwitchStructure"); if (switchStructure != null && switchStructure.HeaderBlock.Address == block.Address) { // This block is the header of a switch structure GenerateSwitchCode(function, switchStructure, result, indentLevel, processedBlocks); return; } // Check if this block is part of a loop var loops = context.LoopsByBlockAddress.TryGetValue(block.Address, out var blockLoops) ? blockLoops : null; if (loops != null && loops.Count > 0) { // Get the innermost loop var loop = loops[0]; // Check if this is the loop header if (loop.Header.Address == block.Address) { // This block is the header of a loop GenerateLoopCode(function, loop, result, indentLevel, processedBlocks); return; } } // If we get here, this is a regular block GenerateRegularBlockCode(function, block, result, indentLevel, processedBlocks); } /// /// Generates code for a regular basic block /// /// The function containing the block /// The block to generate code for /// The string builder to append to /// The current indentation level /// Set of blocks that have already been processed private void GenerateRegularBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet processedBlocks) { // Add a comment with the block address result.AppendLine($"{new string(' ', indentLevel * 4)}// Block at 0x{block.Address:X8}"); // Check if this block ends with a conditional jump bool hasConditionalJump = block.Instructions.Count > 0 && IsConditionalJump(block.Instructions[^1].Type); // If this block has a conditional jump but wasn't detected as an if-else structure, // we'll create an inline if statement for better readability if (hasConditionalJump && block.Successors.Count == 2) { // Get the last instruction (conditional jump) var jumpInstruction = block.Instructions[^1]; // Generate condition based on the jump type string condition = GenerateConditionFromJump(jumpInstruction); // Generate code for all instructions except the last one (the jump) for (int i = 0; i < block.Instructions.Count - 1; i++) { var instruction = block.Instructions[i]; // Skip prologue/epilogue instructions if (IsPrologueOrEpilogueInstruction(instruction)) { continue; } // Generate pseudocode for this instruction var pseudocode = GenerateInstructionPseudocode(function, instruction); if (!string.IsNullOrEmpty(pseudocode)) { result.AppendLine($"{new string(' ', indentLevel * 4)}{pseudocode}"); } else { // If we couldn't generate pseudocode, add the instruction as a comment result.AppendLine($"{new string(' ', indentLevel * 4)}/* {instruction} */;"); } } // Generate the if statement result.AppendLine($"{new string(' ', indentLevel * 4)}if ({condition})"); result.AppendLine($"{new string(' ', indentLevel * 4)}{{"); // Find the target block (true branch) var targetAddress = GetJumpTargetAddress(jumpInstruction); var targetBlock = block.Successors.FirstOrDefault(s => s.Address == targetAddress); if (targetBlock != null) { // Generate code for the target block GenerateBlockCode(function, targetBlock, result, indentLevel + 1, processedBlocks); } result.AppendLine($"{new string(' ', indentLevel * 4)}}}"); // Find the fallthrough block (false branch) var fallthroughBlock = block.Successors.FirstOrDefault(s => s.Address != targetAddress); if (fallthroughBlock != null && !processedBlocks.Contains(fallthroughBlock.Address)) { // Generate code for the fallthrough block GenerateBlockCode(function, fallthroughBlock, result, indentLevel, processedBlocks); } } else { // Regular block processing // Generate code for each instruction in the block foreach (var instruction in block.Instructions) { // Skip prologue/epilogue instructions if (IsPrologueOrEpilogueInstruction(instruction)) { continue; } // Generate pseudocode for this instruction var pseudocode = GenerateInstructionPseudocode(function, instruction); if (!string.IsNullOrEmpty(pseudocode)) { result.AppendLine($"{new string(' ', indentLevel * 4)}{pseudocode}"); } else { // If we couldn't generate pseudocode, add the instruction as a comment result.AppendLine($"{new string(' ', indentLevel * 4)}/* {instruction} */;"); } } // Process successors in order foreach (var successor in block.Successors) { // Only process successors that haven't been processed yet if (!processedBlocks.Contains(successor.Address)) { GenerateBlockCode(function, successor, result, indentLevel, processedBlocks); } } } } /// /// Generates code for an if-else structure /// /// The function containing the structure /// The if-else structure to generate code for /// The string builder to append to /// The current indentation level /// Set of blocks that have already been processed private void GenerateIfElseCode(Function function, ControlFlowAnalyzer.IfElseStructure ifElseStructure, StringBuilder result, int indentLevel, HashSet processedBlocks) { // Mark the condition block as processed processedBlocks.Add(ifElseStructure.ConditionBlock.Address); // Generate the condition expression string condition = GenerateConditionExpression(function, ifElseStructure.ConditionBlock); // Add the if statement string indent = new string(' ', indentLevel * 4); result.AppendLine($"{indent}if ({condition})"); result.AppendLine($"{indent}{{"); // Check if the 'then' branch contains a nested if-else structure if (ifElseStructure.NestedThenStructure != null) { // Generate code for the nested if-else structure in the 'then' branch GenerateIfElseCode(function, ifElseStructure.NestedThenStructure, result, indentLevel + 1, processedBlocks); } else { // Generate code for the 'then' branch normally GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks); } // Close the 'then' branch result.AppendLine($"{indent}}}"); // Add the 'else' branch if it exists and is not already processed if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address)) { result.AppendLine($"{indent}else"); result.AppendLine($"{indent}{{"); // Check if the 'else' branch contains a nested if-else structure (else-if) if (ifElseStructure.NestedElseStructure != null) { // Generate code for the nested if-else structure in the 'else' branch GenerateIfElseCode(function, ifElseStructure.NestedElseStructure, result, indentLevel + 1, processedBlocks); } else { // Generate code for the 'else' branch normally GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks); } // Close the 'else' branch result.AppendLine($"{indent}}}"); } // If this is a complete if-else structure with a merge point, and the merge point hasn't been processed yet if (ifElseStructure.IsComplete && ifElseStructure.MergeBlock != null && !processedBlocks.Contains(ifElseStructure.MergeBlock.Address)) { // Generate code for the merge block GenerateBlockCode(function, ifElseStructure.MergeBlock, result, indentLevel, processedBlocks); } } /// /// Generates code for a switch structure /// /// The function containing the structure /// The switch structure to generate code for /// The string builder to append to /// The current indentation level /// Set of blocks that have already been processed private void GenerateSwitchCode(Function function, ControlFlowAnalyzer.SwitchStructure switchStructure, StringBuilder result, int indentLevel, HashSet processedBlocks) { // Mark the header block as processed processedBlocks.Add(switchStructure.HeaderBlock.Address); // Generate the switch expression string switchExpr = "/* switch expression */"; // Add the switch statement string indent = new string(' ', indentLevel * 4); result.AppendLine($"{indent}// Switch structure at 0x{switchStructure.HeaderBlock.Address:X8}") .AppendLine($"{indent}switch ({switchExpr})"); // Add the switch body result.AppendLine($"{indent}{{") .AppendLine(); // Generate code for each case foreach (var switchCase in switchStructure.Cases) { // Add the case label result.AppendLine($"{indent} case {switchCase.Value}:") .AppendLine($"{indent} // Case block at 0x{switchCase.CaseBlock.Address:X8}"); // Generate code for the case block GenerateBlockCode(function, switchCase.CaseBlock, result, indentLevel + 2, processedBlocks); // Add a break statement result.AppendLine($"{indent} break;") .AppendLine(); } // Add a default case result.AppendLine($"{indent} default:") .AppendLine($"{indent} // Default case") .AppendLine($"{indent} break;"); // Close the switch body result.AppendLine($"{indent}}}"); } /// /// Generates code for a loop structure /// /// The function containing the structure /// The loop to generate code for /// The string builder to append to /// The current indentation level /// Set of blocks that have already been processed private void GenerateLoopCode(Function function, AnalyzerContext.Loop loop, StringBuilder result, int indentLevel, HashSet processedBlocks) { // Mark the header block as processed processedBlocks.Add(loop.Header.Address); // Add the loop header string indent = new string(' ', indentLevel * 4); result.AppendLine($"{indent}// Loop at 0x{loop.Header.Address:X8}") .AppendLine($"{indent}while (true) // Simplified loop condition"); // Add the loop body result.AppendLine($"{indent}{{") .AppendLine($"{indent} // Loop body"); // Generate code for the loop body (starting with the header) GenerateBlockCode(function, loop.Header, result, indentLevel + 1, processedBlocks); // Close the loop body result.AppendLine($"{indent}}}"); } /// /// Generates a condition expression for an if statement /// /// The function containing the block /// The block containing the condition /// A string representing the condition expression private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock) { // If the block is empty, return a placeholder if (conditionBlock.Instructions.Count == 0) { return "condition"; } // Get the last instruction (should be a conditional jump) var lastInstruction = conditionBlock.Instructions[^1]; // If it's not a conditional jump, return a placeholder if (!IsConditionalJump(lastInstruction.Type)) { return "condition"; } // Look for a CMP or TEST instruction that sets the flags for this jump Instruction? comparisonInstruction = null; // Search backwards from the jump instruction to find a comparison for (int i = conditionBlock.Instructions.Count - 2; i >= 0; i--) { var instruction = conditionBlock.Instructions[i]; if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test) { comparisonInstruction = instruction; break; } } // If we found a comparison instruction, generate a condition based on it and the jump if (comparisonInstruction != null && comparisonInstruction.StructuredOperands.Count >= 2) { var left = FormatOperand(comparisonInstruction.StructuredOperands[0]); var right = FormatOperand(comparisonInstruction.StructuredOperands[1]); // Generate condition based on jump type return GenerateConditionFromJump(lastInstruction, left, right); } // If we couldn't find a comparison instruction, just use the jump condition return GenerateConditionFromJump(lastInstruction, null, null); } /// /// Generates pseudocode for a single instruction /// /// The function containing the instruction /// The instruction to generate pseudocode for /// The generated pseudocode private string GenerateInstructionPseudocode(Function function, Instruction instruction) { // Check for special cases first if (instruction.Type == InstructionType.Xor && instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; // Check for XOR with self (zeroing a register) if (dest is RegisterOperand regDest && src is RegisterOperand regSrc && regDest.Register == regSrc.Register) { // This is a common idiom to zero a register return $"{FormatOperand(dest)} = 0; // XOR with self to zero register"; } } // Handle different instruction types switch (instruction.Type) { case InstructionType.Mov: // Handle MOV instruction if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; // Special case for moving 0 (common initialization pattern) if (src is ImmediateOperand immSrc && immSrc.Value == 0) { return $"{FormatOperand(dest)} = 0; // Initialize to zero"; } return $"{FormatOperand(dest)} = {FormatOperand(src)};"; } break; case InstructionType.Add: // Handle ADD instruction if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; // Special case for adding 1 (increment) if (src is ImmediateOperand immSrc && immSrc.Value == 1) { return $"{FormatOperand(dest)}++; // Increment"; } return $"{FormatOperand(dest)} += {FormatOperand(src)};"; } break; case InstructionType.Sub: // Handle SUB instruction if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; // Special case for subtracting 1 (decrement) if (src is ImmediateOperand immSrc && immSrc.Value == 1) { return $"{FormatOperand(dest)}--; // Decrement"; } return $"{FormatOperand(dest)} -= {FormatOperand(src)};"; } break; case InstructionType.And: // Handle AND instruction if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; return $"{FormatOperand(dest)} &= {FormatOperand(src)};"; } break; case InstructionType.Or: // Handle OR instruction if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; return $"{FormatOperand(dest)} |= {FormatOperand(src)};"; } break; case InstructionType.Xor: // Handle XOR instruction if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var src = instruction.StructuredOperands[1]; // We already handled the special case of XOR with self above return $"{FormatOperand(dest)} ^= {FormatOperand(src)};"; } break; case InstructionType.Test: // Handle TEST instruction (no assignment, just sets flags) if (instruction.StructuredOperands.Count >= 2) { var left = instruction.StructuredOperands[0]; var right = instruction.StructuredOperands[1]; // Special case for TEST with self (checking if a register is zero) if (left is RegisterOperand regLeft && right is RegisterOperand regRight && regLeft.Register == regRight.Register) { return $"// Check if {FormatOperand(left)} is zero"; } return $"// Test {FormatOperand(left)} & {FormatOperand(right)}"; } break; case InstructionType.Cmp: // Handle CMP instruction (no assignment, just sets flags) if (instruction.StructuredOperands.Count >= 2) { var left = instruction.StructuredOperands[0]; var right = instruction.StructuredOperands[1]; // For CMP, we'll return a comment that explains what's being compared // This will help with understanding the following conditional jumps return $"// Compare {FormatOperand(left)} with {FormatOperand(right)}"; } break; case InstructionType.Call: // Handle CALL instruction if (instruction.StructuredOperands.Count >= 1) { var target = instruction.StructuredOperands[0]; // For function calls, we'll generate a proper function call expression return $"{FormatOperand(target)}(); // Function call"; } break; case InstructionType.Ret: // Handle RET instruction return "return 0; // Placeholder return value"; case InstructionType.Push: // Handle PUSH instruction if (instruction.StructuredOperands.Count >= 1) { var src = instruction.StructuredOperands[0]; return $"// Push {FormatOperand(src)} onto stack"; } break; case InstructionType.Pop: // Handle POP instruction if (instruction.StructuredOperands.Count >= 1) { var dest = instruction.StructuredOperands[0]; return $"{FormatOperand(dest)} = pop(); // Pop from stack"; } break; case InstructionType.Inc: // Handle INC instruction if (instruction.StructuredOperands.Count >= 1) { var dest = instruction.StructuredOperands[0]; return $"{FormatOperand(dest)}++;"; } break; case InstructionType.Dec: // Handle DEC instruction if (instruction.StructuredOperands.Count >= 1) { var dest = instruction.StructuredOperands[0]; return $"{FormatOperand(dest)}--;"; } break; case InstructionType.Shl: // Handle SHL/SAL instruction (shift left) if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var count = instruction.StructuredOperands[1]; return $"{FormatOperand(dest)} <<= {FormatOperand(count)};"; } break; case InstructionType.Shr: // Handle SHR instruction (shift right logical) if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var count = instruction.StructuredOperands[1]; return $"{FormatOperand(dest)} >>>= {FormatOperand(count)}; // Logical shift right"; } break; case InstructionType.Sar: // Handle SAR instruction (shift right arithmetic) if (instruction.StructuredOperands.Count >= 2) { var dest = instruction.StructuredOperands[0]; var count = instruction.StructuredOperands[1]; return $"{FormatOperand(dest)} >>= {FormatOperand(count)}; // Arithmetic shift right"; } break; default: // For other instructions, just add a comment return $"// {instruction}"; } return string.Empty; } /// /// Formats an operand for display in pseudocode /// /// The operand to format /// A string representation of the operand private string FormatOperand(Operand operand) { if (operand is RegisterOperand regOp) { // Format register operand return RegisterMapper.GetRegisterName(regOp.Register, 32); } else if (operand is ImmediateOperand immOp) { // Format immediate operand return $"0x{immOp.Value:X}"; } else if (operand is DisplacementMemoryOperand dispOp) { // Format displacement memory operand string baseReg = RegisterMapper.GetRegisterName(dispOp.BaseRegister, 32); return $"*({baseReg} + 0x{dispOp.Displacement:X})"; } else if (operand is BaseRegisterMemoryOperand baseOp) { // Format base register memory operand string baseReg = RegisterMapper.GetRegisterName(baseOp.BaseRegister, 32); return $"*({baseReg})"; } // Default formatting return operand.ToString(); } /// /// Checks if an instruction is part of the function prologue or epilogue /// /// The instruction to check /// True if the instruction is part of the prologue or epilogue, false otherwise private bool IsPrologueOrEpilogueInstruction(Instruction instruction) { // Check for common prologue/epilogue instructions if (instruction.Type == InstructionType.Push && instruction.StructuredOperands.Count > 0 && instruction.StructuredOperands[0] is RegisterOperand reg && reg.Register == RegisterIndex.Bp) { return true; // push ebp } if (instruction.Type == InstructionType.Mov && instruction.StructuredOperands.Count > 1 && instruction.StructuredOperands[0] is RegisterOperand destReg && instruction.StructuredOperands[1] is RegisterOperand srcReg && destReg.Register == RegisterIndex.Bp && srcReg.Register == RegisterIndex.Sp) { return true; // mov ebp, esp } if (instruction.Type == InstructionType.Pop && instruction.StructuredOperands.Count > 0 && instruction.StructuredOperands[0] is RegisterOperand popReg && popReg.Register == RegisterIndex.Bp) { return true; // pop ebp } if (instruction.Type == InstructionType.Ret) { return true; // ret } return false; } /// /// Checks if the given instruction type is a conditional jump /// /// The instruction type /// True if the instruction is a conditional jump, false otherwise private bool IsConditionalJump(InstructionType type) { // Check for common conditional jumps return type == InstructionType.Jz || type == InstructionType.Jnz || type == InstructionType.Jg || type == InstructionType.Jge || type == InstructionType.Jl || type == InstructionType.Jle || type == InstructionType.Ja || type == InstructionType.Jae || type == InstructionType.Jb || type == InstructionType.Jbe || type == InstructionType.Jo || type == InstructionType.Jno || type == InstructionType.Js || type == InstructionType.Jns; } /// /// Gets the target address of a jump instruction /// /// The jump instruction /// The target address of the jump private ulong GetJumpTargetAddress(Instruction instruction) { // Jump instructions have the target address as their first operand if (instruction.StructuredOperands.Count > 0) { return instruction.StructuredOperands[0].GetValue(); } // If we can't determine the target address, return 0 return 0; } /// /// Generates a condition expression based on a conditional jump instruction /// /// The conditional jump instruction /// The left operand of the comparison, if available /// The right operand of the comparison, if available /// A string representing the condition expression private string GenerateConditionFromJump(Instruction instruction, string? left = null, string? right = null) { // If we don't have comparison operands, use a generic condition if (left == null || right == null) { switch (instruction.Type) { case InstructionType.Jz: return "zero flag is set"; case InstructionType.Jnz: return "zero flag is not set"; default: return "condition"; } } // If we have comparison operands, generate a more specific condition switch (instruction.Type) { case InstructionType.Jz: return $"{left} == 0"; case InstructionType.Jnz: return $"{left} != 0"; default: return $"{left} ? {right}"; } } }