//===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "FileAnalysis.h"
#include "GraphBuilder.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
using Instr = llvm::cfi_verify::FileAnalysis::Instr;
using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
namespace llvm {
namespace cfi_verify {
bool IgnoreDWARFFlag;
static cl::opt<bool, true> IgnoreDWARFArg(
"ignore-dwarf",
cl::desc(
"Ignore all DWARF data. This relaxes the requirements for all "
"statically linked libraries to have been compiled with '-g', but "
"will result in false positives for 'CFI unprotected' instructions."),
cl::location(IgnoreDWARFFlag), cl::init(false));
StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
switch (Status) {
case CFIProtectionStatus::PROTECTED:
return "PROTECTED";
case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
return "FAIL_NOT_INDIRECT_CF";
case CFIProtectionStatus::FAIL_ORPHANS:
return "FAIL_ORPHANS";
case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
return "FAIL_BAD_CONDITIONAL_BRANCH";
case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
return "FAIL_REGISTER_CLOBBERED";
case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
return "FAIL_INVALID_INSTRUCTION";
}
llvm_unreachable("Attempted to stringify an unknown enum value.");
}
Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
// Open the filename provided.
Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
object::createBinary(Filename);
if (!BinaryOrErr)
return BinaryOrErr.takeError();
// Construct the object and allow it to take ownership of the binary.
object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
FileAnalysis Analysis(std::move(Binary));
Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
if (!Analysis.Object)
return make_error<UnsupportedDisassembly>("Failed to cast object");
switch (Analysis.Object->getArch()) {
case Triple::x86:
case Triple::x86_64:
case Triple::aarch64:
case Triple::aarch64_be:
break;
default:
return make_error<UnsupportedDisassembly>("Unsupported architecture.");
}
Analysis.ObjectTriple = Analysis.Object->makeTriple();
Analysis.Features = Analysis.Object->getFeatures();
// Init the rest of the object.
if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
return std::move(InitResponse);
if (auto SectionParseResponse = Analysis.parseCodeSections())
return std::move(SectionParseResponse);
return std::move(Analysis);
}
FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
: Binary(std::move(Binary)) {}
FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
const SubtargetFeatures &Features)
: ObjectTriple(ObjectTriple), Features(Features) {}
const Instr *
FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
std::map<uint64_t, Instr>::const_iterator KV =
Instructions.find(InstrMeta.VMAddress);
if (KV == Instructions.end() || KV == Instructions.begin())
return nullptr;
if (!(--KV)->second.Valid)
return nullptr;
return &KV->second;
}
const Instr *
FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
std::map<uint64_t, Instr>::const_iterator KV =
Instructions.find(InstrMeta.VMAddress);
if (KV == Instructions.end() || ++KV == Instructions.end())
return nullptr;
if (!KV->second.Valid)
return nullptr;
return &KV->second;
}
bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
for (const auto &Operand : InstrMeta.Instruction) {
if (Operand.isReg())
return true;
}
return false;
}
const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
const auto &InstrKV = Instructions.find(Address);
if (InstrKV == Instructions.end())
return nullptr;
return &InstrKV->second;
}
const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
const auto &InstrKV = Instructions.find(Address);
assert(InstrKV != Instructions.end() && "Address doesn't exist.");
return InstrKV->second;
}
bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
return InstrDesc.isTrap();
}
bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
if (!InstrMeta.Valid)
return false;
if (isCFITrap(InstrMeta))
return false;
const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
return InstrDesc.isConditionalBranch();
return true;
}
const Instr *
FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
if (!InstrMeta.Valid)
return nullptr;
if (isCFITrap(InstrMeta))
return nullptr;
const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
const Instr *NextMetaPtr;
if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
if (InstrDesc.isConditionalBranch())
return nullptr;
uint64_t Target;
if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
InstrMeta.InstructionSize, Target))
return nullptr;
NextMetaPtr = getInstruction(Target);
} else {
NextMetaPtr =
getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
}
if (!NextMetaPtr || !NextMetaPtr->Valid)
return nullptr;
return NextMetaPtr;
}
std::set<const Instr *>
FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
std::set<const Instr *> CFCrossReferences;
const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
if (PrevInstruction && canFallThrough(*PrevInstruction))
CFCrossReferences.insert(PrevInstruction);
const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
if (TargetRefsKV == StaticBranchTargetings.end())
return CFCrossReferences;
for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
if (SourceInstrKV == Instructions.end()) {
errs() << "Failed to find source instruction at address "
<< format_hex(SourceInstrAddress, 2)
<< " for the cross-reference to instruction at address "
<< format_hex(InstrMeta.VMAddress, 2) << ".\n";
continue;
}
CFCrossReferences.insert(&SourceInstrKV->second);
}
return CFCrossReferences;
}
const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
return IndirectInstructions;
}
const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
return RegisterInfo.get();
}
const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
return MIA.get();
}
Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) {
assert(Symbolizer != nullptr && "Symbolizer is invalid.");
return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
}
CFIProtectionStatus
FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
if (!InstrMetaPtr)
return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
if (!usesRegisterOperand(*InstrMetaPtr))
return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
if (!Graph.OrphanedNodes.empty())
return CFIProtectionStatus::FAIL_ORPHANS;
for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
if (!BranchNode.CFIProtection)
return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
}
if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
return CFIProtectionStatus::PROTECTED;
}
uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
// Get the set of registers we must check to ensure they're not clobbered.
const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
DenseSet<unsigned> RegisterNumbers;
for (const auto &Operand : IndirectCF.Instruction) {
if (Operand.isReg())
RegisterNumbers.insert(Operand.getReg());
}
assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
// Now check all branches to indirect CFs and ensure no clobbering happens.
for (const auto &Branch : Graph.ConditionalBranchNodes) {
uint64_t Node;
if (Branch.IndirectCFIsOnTargetPath)
Node = Branch.Target;
else
Node = Branch.Fallthrough;
// Some architectures (e.g., AArch64) cannot load in an indirect branch, so
// we allow them one load.
bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
// We walk backwards from the indirect CF. It is the last node returned by
// Graph.flattenAddress, so we skip it since we already handled it.
DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
Node = *I;
const Instr &NodeInstr = getInstructionOrDie(Node);
const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
RI != RE; ++RI) {
unsigned RegNum = *RI;
if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
*RegisterInfo)) {
if (!canLoad || !InstrDesc.mayLoad())
return Node;
canLoad = false;
CurRegisterNumbers.erase(RI);
// Add the registers this load reads to those we check for clobbers.
for (unsigned i = InstrDesc.getNumDefs(),
e = InstrDesc.getNumOperands(); i != e; i++) {
const auto Operand = NodeInstr.Instruction.getOperand(i);
if (Operand.isReg())
CurRegisterNumbers.insert(Operand.getReg());
}
break;
}
}
}
}
return Graph.BaseAddress;
}
void FileAnalysis::printInstruction(const Instr &InstrMeta,
raw_ostream &OS) const {
Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
}
Error FileAnalysis::initialiseDisassemblyMembers() {
std::string TripleName = ObjectTriple.getTriple();
ArchName = "";
MCPU = "";
std::string ErrorString;
Symbolizer.reset(new LLVMSymbolizer());
ObjectTarget =
TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
if (!ObjectTarget)
return make_error<UnsupportedDisassembly>(
(Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
"\", failed with error: " + ErrorString)
.str());
RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
if (!RegisterInfo)
return make_error<UnsupportedDisassembly>(
"Failed to initialise RegisterInfo.");
AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
if (!AsmInfo)
return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
TripleName, MCPU, Features.getString()));
if (!SubtargetInfo)
return make_error<UnsupportedDisassembly>(
"Failed to initialise SubtargetInfo.");
MII.reset(ObjectTarget->createMCInstrInfo());
if (!MII)
return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
Disassembler.reset(
ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
if (!Disassembler)
return make_error<UnsupportedDisassembly>(
"No disassembler available for target");
MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
Printer.reset(ObjectTarget->createMCInstPrinter(
ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
*RegisterInfo));
return Error::success();
}
Error FileAnalysis::parseCodeSections() {
if (!IgnoreDWARFFlag) {
std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
if (!DWARF)
return make_error<StringError>("Could not create DWARF information.",
inconvertibleErrorCode());
bool LineInfoValid = false;
for (auto &Unit : DWARF->compile_units()) {
const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
if (LineTable && !LineTable->Rows.empty()) {
LineInfoValid = true;
break;
}
}
if (!LineInfoValid)
return make_error<StringError>(
"DWARF line information missing. Did you compile with '-g'?",
inconvertibleErrorCode());
}
for (const object::SectionRef &Section : Object->sections()) {
// Ensure only executable sections get analysed.
if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
continue;
StringRef SectionContents;
if (Section.getContents(SectionContents))
return make_error<StringError>("Failed to retrieve section contents",
inconvertibleErrorCode());
ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
Section.getSize());
parseSectionContents(SectionBytes, Section.getAddress());
}
return Error::success();
}
void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
uint64_t SectionAddress) {
assert(Symbolizer && "Symbolizer is uninitialised.");
MCInst Instruction;
Instr InstrMeta;
uint64_t InstructionSize;
for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
bool ValidInstruction =
Disassembler->getInstruction(Instruction, InstructionSize,
SectionBytes.drop_front(Byte), 0, nulls(),
outs()) == MCDisassembler::Success;
Byte += InstructionSize;
uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
InstrMeta.Instruction = Instruction;
InstrMeta.VMAddress = VMAddress;
InstrMeta.InstructionSize = InstructionSize;
InstrMeta.Valid = ValidInstruction;
addInstruction(InstrMeta);
if (!ValidInstruction)
continue;
// Skip additional parsing for instructions that do not affect the control
// flow.
const auto &InstrDesc = MII->get(Instruction.getOpcode());
if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
continue;
uint64_t Target;
if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
// If the target can be evaluated, it's not indirect.
StaticBranchTargetings[Target].push_back(VMAddress);
continue;
}
if (!usesRegisterOperand(InstrMeta))
continue;
if (InstrDesc.isReturn())
continue;
// Check if this instruction exists in the range of the DWARF metadata.
if (!IgnoreDWARFFlag) {
auto LineInfo =
Symbolizer->symbolizeCode(Object->getFileName(), VMAddress);
if (!LineInfo) {
handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
errs() << "Symbolizer failed to get line: " << E.message() << "\n";
});
continue;
}
if (LineInfo->FileName == "<invalid>")
continue;
}
IndirectInstructions.insert(VMAddress);
}
}
void FileAnalysis::addInstruction(const Instr &Instruction) {
const auto &KV =
Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
if (!KV.second) {
errs() << "Failed to add instruction at address "
<< format_hex(Instruction.VMAddress, 2)
<< ": Instruction at this address already exists.\n";
exit(EXIT_FAILURE);
}
}
UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
char UnsupportedDisassembly::ID;
void UnsupportedDisassembly::log(raw_ostream &OS) const {
OS << "Could not initialise disassembler: " << Text;
}
std::error_code UnsupportedDisassembly::convertToErrorCode() const {
return std::error_code();
}
} // namespace cfi_verify
} // namespace llvm