retdec
decoder.h
Go to the documentation of this file.
1 
7 #ifndef RETDEC_BIN2LLVMIR_OPTIMIZATIONS_DECODER_DECODER_H
8 #define RETDEC_BIN2LLVMIR_OPTIMIZATIONS_DECODER_DECODER_H
9 
10 #include <map>
11 #include <optional>
12 #include <queue>
13 #include <sstream>
14 
15 #include <llvm/IR/CFG.h>
16 #include <llvm/IR/Function.h>
17 #include <llvm/IR/InstIterator.h>
18 #include <llvm/IR/Module.h>
19 #include <llvm/Pass.h>
20 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
21 
22 #include "retdec/common/address.h"
37 
38 namespace retdec {
39 namespace bin2llvmir {
40 
41 class Decoder : public llvm::ModulePass
42 {
43  public:
44  static char ID;
45  Decoder();
46  ~Decoder();
47  virtual bool runOnModule(llvm::Module& m) override;
48  bool runOnModuleCustom(
49  llvm::Module& m,
50  Config* c,
51  FileImage* o,
52  DebugFormat* d,
53  NameContainer* n,
54  Abi* a);
55 
56  private:
57  using ByteData = typename std::pair<const std::uint8_t*, std::size_t>;
58 
59  private:
60  bool runCatcher();
61  bool run();
62 
63  // Initializations.
64  //
65  private:
66  void initTranslator();
68  void initEnvironment();
72  void initRanges();
75  void initJumpTargets();
76  void initJumpTargetsConfig();
81  void initJumpTargetsDebug();
83  void initConfigFunctions();
84  void initStaticCode();
85  void initVtables();
86 
87  private:
88  void decode();
89  bool getJumpTarget(JumpTarget& jt);
90  void decodeJumpTarget(const JumpTarget& jt);
91  std::size_t decodeJumpTargetDryRun(
92  const JumpTarget& jt,
93  ByteData bytes,
94  bool strict = false);
95  cs_mode determineMode(cs_insn* insn, common::Address& target);
97  translate(
98  ByteData& bytes,
99  common::Address& addr,
100  llvm::IRBuilder<>& irb);
101 
103  common::Address addr,
105  std::size_t& rangeSize);
107  common::Address addr,
108  llvm::CallInst* branchCall,
109  llvm::Value* val);
110  bool getJumpTargetSwitch(
111  common::Address addr,
112  llvm::CallInst* branchCall,
113  llvm::Value* val,
114  SymbolicTree& st);
116  common::Address addr,
119  common::Address& addr,
121  ByteData& bytes,
122  llvm::IRBuilder<>& irb);
124  common::Address& addr,
126  ByteData& bytes,
127  llvm::IRBuilder<>& irb);
128 
129  void resolvePseudoCalls();
130  void finalizePseudoCalls();
131 
132  // Basic block related methods.
133  //
134  private:
135  common::Address getBasicBlockAddress(llvm::BasicBlock* b);
136  common::Address getBasicBlockEndAddress(llvm::BasicBlock* b);
138  llvm::BasicBlock* getBasicBlockAtAddress(common::Address a);
139  llvm::BasicBlock* getBasicBlockBeforeAddress(common::Address a);
140  llvm::BasicBlock* getBasicBlockAfterAddress(common::Address a);
141  llvm::BasicBlock* getBasicBlockContainingAddress(common::Address a);
142  llvm::BasicBlock* createBasicBlock(
143  common::Address a,
144  llvm::Function* f,
145  llvm::BasicBlock* insertAfter = nullptr);
146  void addBasicBlock(common::Address a, llvm::BasicBlock* b);
147 
148  std::map<common::Address, llvm::BasicBlock*> _addr2bb;
149  std::map<llvm::BasicBlock*, common::Address> _bb2addr;
150 
151  // Function related methods.
152  //
153  private:
154  common::Address getFunctionAddress(llvm::Function* f);
155  common::Address getFunctionEndAddress(llvm::Function* f);
157  llvm::Function* getFunctionAtAddress(common::Address a);
158  llvm::Function* getFunctionBeforeAddress(common::Address a);
159  llvm::Function* getFunctionAfterAddress(common::Address a);
161  llvm::Function* createFunction(
162  common::Address a,
163  bool declaration = false);
164  void addFunction(common::Address a, llvm::Function* f);
165  void addFunctionSize(llvm::Function* f, std::optional<std::size_t> sz);
166 
167  std::map<common::Address, llvm::Function*> _addr2fnc;
168  std::map<llvm::Function*, common::Address> _fnc2addr;
169  // Function sizes from debug info/symbol table/config/etc.
170  // Used to prevent function splitting.
171  //
172  // TODO: Potential overlaps are not handled.
173  // E.g. ack.arm.gnuarmgcc-4.4.1.O0.g.elf:
174  // __floatundidf @ 0x1645c : size = 128
175  // __floatdidf @ 0x16470 : size = 108
176  // It looks like there is one function in another.
177  //
178  std::map<llvm::Function*, std::size_t> _fnc2sz;
179 
180  // Pattern recognition methods.
181  //
182  private:
183  bool patternsRecognize();
186 
187  // x86 specifix.
188  //
189  private:
190  std::size_t decodeJumpTargetDryRun_x86(
191  const JumpTarget& jt,
192  ByteData bytes,
193  bool strict = false);
194 
195  // ARM specific.
196  //
197  private:
198  std::size_t decodeJumpTargetDryRun_arm(
199  const JumpTarget& jt,
200  ByteData bytes,
201  bool strict = false);
202  std::size_t decodeJumpTargetDryRun_arm(
203  const JumpTarget& jt,
204  ByteData bytes,
205  cs_mode mode,
206  std::size_t &decodedSz,
207  bool strict = false);
208  void patternsPseudoCall_arm(llvm::CallInst*& call, AsmInstruction& pAi);
209  cs_mode determineMode_arm(cs_insn* insn, common::Address& target);
210 
211  // ARM64 specific.
212  //
213  private:
214  std::size_t decodeJumpTargetDryRun_arm64(
215  const JumpTarget& jt,
216  ByteData bytes,
217  bool strict = false);
218  void patternsPseudoCall_arm64(llvm::CallInst*& call, AsmInstruction& pAi);
219 
220  // MIPS specific.
221  //
222  private:
223  bool disasm_mips(
224  csh ce,
225  cs_mode m,
226  ByteData& bytes,
227  uint64_t& a,
228  cs_insn* i);
229  std::size_t decodeJumpTargetDryRun_mips(
230  const JumpTarget& jt,
231  ByteData bytes,
232  bool strict = false);
233  void initializeGpReg_mips();
234 
235  // PowerPC specific.
236  //
237  private:
238  std::size_t decodeJumpTargetDryRun_ppc(
239  const JumpTarget& jt,
240  ByteData bytes,
241  bool strict = false);
242 
243  // IR modifications.
244  //
245  private:
246  llvm::CallInst* transformToCall(
247  llvm::CallInst* pseudo,
248  llvm::Function* callee);
249  llvm::CallInst* transformToCondCall(
250  llvm::CallInst* pseudo,
251  llvm::Value* cond,
252  llvm::Function* callee,
253  llvm::BasicBlock* falseBb);
254  llvm::ReturnInst* transformToReturn(llvm::CallInst* pseudo);
255  llvm::BranchInst* transformToBranch(
256  llvm::CallInst* pseudo,
257  llvm::BasicBlock* branchee);
258  llvm::BranchInst* transformToCondBranch(
259  llvm::CallInst* pseudo,
260  llvm::Value* cond,
261  llvm::BasicBlock* trueBb,
262  llvm::BasicBlock* falseBb);
263  llvm::SwitchInst* transformToSwitch(
264  llvm::CallInst* pseudo,
265  llvm::Value* val,
266  llvm::BasicBlock* defaultBb,
267  const std::vector<llvm::BasicBlock*>& cases);
268 
269  llvm::GlobalVariable* getCallReturnObject();
270 
272  common::Address addr,
273  llvm::Function*& tFnc,
274  llvm::BasicBlock*& tBb);
276  common::Address addr,
277  llvm::BasicBlock*& tBb,
278  llvm::Function*& tFnc,
279  llvm::Instruction* from);
280 
281  bool canSplitFunctionOn(llvm::BasicBlock* bb);
282  bool canSplitFunctionOn(
283  common::Address addr,
284  llvm::BasicBlock* bb,
285  std::set<llvm::BasicBlock*>& newFncStarts);
286  llvm::Function* splitFunctionOn(common::Address addr);
287  llvm::Function* splitFunctionOn(common::Address addr, llvm::BasicBlock* bb);
288 
289  // Data.
290  //
291  private:
292  llvm::Module* _module = nullptr;
293  Config* _config = nullptr;
294  FileImage* _image = nullptr;
295  DebugFormat* _debug = nullptr;
296  NameContainer* _names = nullptr;
298  Abi* _abi = nullptr;
299 
300  std::unique_ptr<capstone2llvmir::Capstone2LlvmIrTranslator> _c2l;
301  cs_insn* _dryCsInsn = nullptr;
302 
303  llvm::IRBuilder<>* _irb;
304 
307 
309  std::set<std::string> _externs;
310  std::set<common::Address> _imports;
311  std::set<common::Address> _exports;
312  std::set<common::Address> _symbols;
313  std::map<common::Address, const common::Function*> _debugFncs;
314  std::set<common::Address> _staticFncs;
315  std::set<common::Address> _vtableFncs;
316  std::set<llvm::Function*> _terminatingFncs;
317  llvm::Function* _entryPointFunction = nullptr;
329  std::map<common::Address, std::set<llvm::SwitchInst*>> _switchTableStarts;
330 
331  // We create helper BBs (without name and address) to handle MIPS
332  // likely branches. For convenience, we map them to real BBs they will
333  // eventually jump to.
334  std::map<llvm::BasicBlock*, llvm::BasicBlock*> _likelyBb2Target;
335 
336  // TODO: remove, solve better.
337  bool _switchGenerated = false;
338 
339  bool _somethingDecoded = false;
340 };
341 
342 } // namespace bin2llvmir
343 } // namespace retdec
344 
345 #endif
ABI information.
Mapping of LLVM instructions to underlying ASM instructions.
Config DB provider for bin2llvmirl.
Debug format provider for bin2llvmirl.
Common public interface for translators converting bytes to LLVM IR.
Definition: abi.h:27
Definition: asm_instruction.h:34
Definition: config.h:24
Definition: debugformat.h:20
Definition: decoder.h:42
std::map< llvm::BasicBlock *, common::Address > _bb2addr
Definition: decoder.h:149
std::set< common::Address > _vtableFncs
Definition: decoder.h:315
void initEnvironmentPseudoFunctions()
Definition: decoder_init.cpp:128
llvm::IRBuilder * _irb
Definition: decoder.h:303
llvm::BranchInst * transformToCondBranch(llvm::CallInst *pseudo, llvm::Value *cond, llvm::BasicBlock *trueBb, llvm::BasicBlock *falseBb)
Definition: ir_modifications.cpp:81
void getOrCreateCallTarget(common::Address addr, llvm::Function *&tFnc, llvm::BasicBlock *&tBb)
Definition: ir_modifications.cpp:186
typename std::pair< const std::uint8_t *, std::size_t > ByteData
Definition: decoder.h:57
static char ID
Definition: decoder.h:44
llvm::Function * getFunctionContainingAddress(common::Address a)
Definition: functions.cpp:95
void initJumpTargetsExterns()
Definition: decoder_init.cpp:626
void initJumpTargetsDebug()
Definition: decoder_init.cpp:887
void addFunction(common::Address a, llvm::Function *f)
Definition: functions.cpp:154
bool patternTerminatingCalls()
Definition: patterns.cpp:31
RangesToDecode _ranges
Definition: decoder.h:305
std::set< llvm::Function * > _terminatingFncs
Definition: decoder.h:316
std::map< common::Address, llvm::Function * > _addr2fnc
Definition: decoder.h:167
std::size_t decodeJumpTargetDryRun_arm(const JumpTarget &jt, ByteData bytes, bool strict=false)
Definition: arm.cpp:45
llvm::BasicBlock * getBasicBlockAfterAddress(common::Address a)
Definition: bbs.cpp:96
void initJumpTargetsExports()
Definition: decoder_init.cpp:787
void initializeGpReg_mips()
Definition: mips.cpp:148
void decode()
Definition: decoder.cpp:144
capstone2llvmir::Capstone2LlvmIrTranslator::TranslationResultOne translate(ByteData &bytes, common::Address &addr, llvm::IRBuilder<> &irb)
Definition: decoder.cpp:338
std::size_t decodeJumpTargetDryRun_arm64(const JumpTarget &jt, ByteData bytes, bool strict=false)
Definition: arm64.cpp:47
void initDryRunCsInstruction()
Definition: decoder_init.cpp:93
JumpTargets _jumpTargets
Definition: decoder.h:306
cs_mode determineMode_arm(cs_insn *insn, common::Address &target)
Definition: arm.cpp:216
llvm::BasicBlock * getBasicBlockBeforeAddress(common::Address a)
Definition: bbs.cpp:65
bool run()
Definition: decoder.cpp:98
bool getJumpTargetSwitch(common::Address addr, llvm::CallInst *branchCall, llvm::Value *val, SymbolicTree &st)
Definition: decoder.cpp:950
cs_insn * _dryCsInsn
Definition: decoder.h:301
bool _somethingDecoded
Definition: decoder.h:339
llvm::CallInst * transformToCondCall(llvm::CallInst *pseudo, llvm::Value *cond, llvm::Function *callee, llvm::BasicBlock *falseBb)
Definition: ir_modifications.cpp:32
common::Address getBasicBlockAddress(llvm::BasicBlock *b)
Definition: bbs.cpp:17
std::set< std::string > _externs
Name of all extern functions gathered from object files.
Definition: decoder.h:309
void patternsPseudoCall_arm64(llvm::CallInst *&call, AsmInstruction &pAi)
std::size_t decodeJumpTargetDryRun(const JumpTarget &jt, ByteData bytes, bool strict=false)
Definition: decoder.cpp:367
std::set< common::Address > _imports
Definition: decoder.h:310
llvm::Function * splitFunctionOn(common::Address addr)
Definition: ir_modifications.cpp:474
common::Address getBasicBlockAddressAfter(common::Address a)
Definition: bbs.cpp:47
std::set< common::Address > _staticFncs
Definition: decoder.h:314
llvm::Function * createFunction(common::Address a, bool declaration=false)
Definition: functions.cpp:109
std::set< common::Address > _symbols
Definition: decoder.h:312
Abi * _abi
Definition: decoder.h:298
void initJumpTargetsSymbols()
Definition: decoder_init.cpp:841
llvm::Function * getFunctionAfterAddress(common::Address a)
Definition: functions.cpp:85
llvm::GlobalVariable * getCallReturnObject()
Definition: ir_modifications.cpp:149
bool patternsRecognize()
Definition: patterns.cpp:18
bool canSplitFunctionOn(llvm::BasicBlock *bb)
Definition: ir_modifications.cpp:317
void handleDelaySlotTypical(common::Address &addr, capstone2llvmir::Capstone2LlvmIrTranslator::TranslationResultOne &res, ByteData &bytes, llvm::IRBuilder<> &irb)
Definition: decoder.cpp:1332
std::size_t decodeJumpTargetDryRun_mips(const JumpTarget &jt, ByteData bytes, bool strict=false)
Definition: mips.cpp:68
void initJumpTargets()
Definition: decoder_init.cpp:544
llvm::SwitchInst * transformToSwitch(llvm::CallInst *pseudo, llvm::Value *val, llvm::BasicBlock *defaultBb, const std::vector< llvm::BasicBlock * > &cases)
Definition: ir_modifications.cpp:95
bool runOnModuleCustom(llvm::Module &m, Config *c, FileImage *o, DebugFormat *d, NameContainer *n, Abi *a)
Definition: decoder.cpp:61
void initEnvironmentAsm2LlvmMapping()
Definition: decoder_init.cpp:117
std::set< common::Address > _exports
Definition: decoder.h:311
cs_mode determineMode(cs_insn *insn, common::Address &target)
Definition: decoder.cpp:404
std::map< llvm::Function *, common::Address > _fnc2addr
Definition: decoder.h:168
llvm::Function * _entryPointFunction
Definition: decoder.h:317
bool patternStaticallyLinked()
Definition: patterns.cpp:309
virtual bool runOnModule(llvm::Module &m) override
Definition: decoder.cpp:49
common::Address getBasicBlockEndAddress(llvm::BasicBlock *b)
Definition: bbs.cpp:33
void decodeJumpTarget(const JumpTarget &jt)
Definition: decoder.cpp:181
Llvm2CapstoneInsnMap * _llvm2capstone
Definition: decoder.h:297
llvm::BasicBlock * getBasicBlockAtAddress(common::Address a)
Definition: bbs.cpp:56
void initConfigFunctions()
Definition: decoder_init.cpp:1032
void initEnvironmentRegisters()
Definition: decoder_init.cpp:162
NameContainer * _names
Definition: decoder.h:296
void initRanges()
Definition: decoder_init.cpp:184
std::map< common::Address, llvm::BasicBlock * > _addr2bb
Definition: decoder.h:148
void addFunctionSize(llvm::Function *f, std::optional< std::size_t > sz)
Definition: functions.cpp:164
std::map< llvm::BasicBlock *, llvm::BasicBlock * > _likelyBb2Target
Definition: decoder.h:334
llvm::BasicBlock * getBasicBlockContainingAddress(common::Address a)
Definition: bbs.cpp:106
std::map< common::Address, const common::Function * > _debugFncs
Definition: decoder.h:313
std::unique_ptr< capstone2llvmir::Capstone2LlvmIrTranslator > _c2l
Definition: decoder.h:300
std::map< llvm::Function *, std::size_t > _fnc2sz
Definition: decoder.h:178
bool _switchGenerated
Definition: decoder.h:337
void resolvePseudoCalls()
Definition: decoder.cpp:1427
void patternsPseudoCall_arm(llvm::CallInst *&call, AsmInstruction &pAi)
Definition: arm.cpp:171
bool instructionBreaksBasicBlock(common::Address addr, capstone2llvmir::Capstone2LlvmIrTranslator::TranslationResultOne &tr)
Definition: decoder.cpp:416
bool disasm_mips(csh ce, cs_mode m, ByteData &bytes, uint64_t &a, cs_insn *i)
Definition: mips.cpp:49
void initTranslator()
Definition: decoder_init.cpp:26
void initEnvironment()
Definition: decoder_init.cpp:102
void finalizePseudoCalls()
Definition: decoder.cpp:1484
llvm::Module * _module
Definition: decoder.h:292
void initAllowedRangesWithConfig()
Definition: decoder_init.cpp:338
void initAllowedRangesWithSegments()
Definition: decoder_init.cpp:217
void initJumpTargetsEntryPoint()
Definition: decoder_init.cpp:596
bool getJumpTargetsFromInstruction(common::Address addr, capstone2llvmir::Capstone2LlvmIrTranslator::TranslationResultOne &tr, std::size_t &rangeSize)
Definition: decoder.cpp:470
void handleDelaySlotLikely(common::Address &addr, capstone2llvmir::Capstone2LlvmIrTranslator::TranslationResultOne &res, ByteData &bytes, llvm::IRBuilder<> &irb)
Definition: decoder.cpp:1377
Decoder()
Definition: decoder.cpp:35
bool runCatcher()
Definition: decoder.cpp:79
llvm::Function * getFunctionBeforeAddress(common::Address a)
Definition: functions.cpp:57
void getOrCreateBranchTarget(common::Address addr, llvm::BasicBlock *&tBb, llvm::Function *&tFnc, llvm::Instruction *from)
Definition: ir_modifications.cpp:237
void initStaticCode()
Definition: decoder_init.cpp:934
bool getJumpTarget(JumpTarget &jt)
Definition: decoder.cpp:161
FileImage * _image
Definition: decoder.h:294
std::size_t decodeJumpTargetDryRun_ppc(const JumpTarget &jt, ByteData bytes, bool strict=false)
Definition: powerpc.cpp:18
common::Address getFunctionAddressAfter(common::Address a)
Definition: functions.cpp:39
void initJumpTargetsImports()
Definition: decoder_init.cpp:667
void initVtables()
Definition: decoder_init.cpp:992
std::map< common::Address, std::set< llvm::SwitchInst * > > _switchTableStarts
Definition: decoder.h:329
llvm::ReturnInst * transformToReturn(llvm::CallInst *pseudo)
Definition: ir_modifications.cpp:56
llvm::CallInst * transformToCall(llvm::CallInst *pseudo, llvm::Function *callee)
Definition: ir_modifications.cpp:14
llvm::BranchInst * transformToBranch(llvm::CallInst *pseudo, llvm::BasicBlock *branchee)
Definition: ir_modifications.cpp:69
void initJumpTargetsConfig()
Definition: decoder_init.cpp:560
llvm::Function * getFunctionAtAddress(common::Address a)
Definition: functions.cpp:48
common::Address getFunctionEndAddress(llvm::Function *f)
Definition: functions.cpp:28
std::size_t decodeJumpTargetDryRun_x86(const JumpTarget &jt, ByteData bytes, bool strict=false)
Definition: x86.cpp:18
llvm::BasicBlock * createBasicBlock(common::Address a, llvm::Function *f, llvm::BasicBlock *insertAfter=nullptr)
Definition: bbs.cpp:137
void addBasicBlock(common::Address a, llvm::BasicBlock *b)
Definition: bbs.cpp:162
Config * _config
Definition: decoder.h:293
DebugFormat * _debug
Definition: decoder.h:295
common::Address getFunctionAddress(llvm::Function *f)
Definition: functions.cpp:18
~Decoder()
Definition: decoder.cpp:41
Definition: fileimage.h:27
Definition: jump_targets.h:27
Definition: jump_targets.h:97
Definition: names.h:154
Definition: decoder_ranges.h:19
Definition: symbolic_tree.h:43
Definition: address.h:21
Address, address pair and other derived class representation.
Decode input binary into LLVM IR.
Representation of ranges to decode.
File image provider for bin2llvmirl.
Modify both LLVM IR and config.
Jump targets representation.
Database of objects' names in binary.
The frontend-end part of the decompiler.
typename std::map< llvm::StoreInst *, cs_insn * > Llvm2CapstoneInsnMap
Definition: asm_instruction.h:25
Definition: archive_wrapper.h:19
Static code finder library.
Construction of symbolic tree from the given node.