Training courses

Kernel and Embedded Linux

Bootlin training courses

Embedded Linux, kernel,
Yocto Project, Buildroot, real-time,
graphics, boot time, debugging...

Bootlin logo

Elixir Cross Referencer

//===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "Latency.h"

#include "Assembler.h"
#include "BenchmarkRunner.h"
#include "MCInstrDescView.h"
#include "PerfHelper.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/Support/FormatVariadic.h"

namespace exegesis {

static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) {
  return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN;
}

// FIXME: Handle memory, see PR36905.
static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) {
  return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY;
}

LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;

llvm::Error LatencyBenchmarkRunner::isInfeasible(
    const llvm::MCInstrDesc &MCInstrDesc) const {
  if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand))
    return llvm::make_error<BenchmarkFailure>(
        "Infeasible : has unknown operands");
  if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand))
    return llvm::make_error<BenchmarkFailure>(
        "Infeasible : has memory operands");
  return llvm::Error::success();
}

llvm::Expected<SnippetPrototype>
LatencyBenchmarkRunner::generateTwoInstructionPrototype(
    const Instruction &Instr) const {
  std::vector<unsigned> Opcodes;
  Opcodes.resize(State.getInstrInfo().getNumOpcodes());
  std::iota(Opcodes.begin(), Opcodes.end(), 0U);
  std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
  for (const unsigned OtherOpcode : Opcodes) {
    if (OtherOpcode == Instr.Description->Opcode)
      continue;
    const auto &OtherInstrDesc = State.getInstrInfo().get(OtherOpcode);
    if (auto E = isInfeasible(OtherInstrDesc)) {
      llvm::consumeError(std::move(E));
      continue;
    }
    const Instruction OtherInstr(OtherInstrDesc, RATC);
    const AliasingConfigurations Forward(Instr, OtherInstr);
    const AliasingConfigurations Back(OtherInstr, Instr);
    if (Forward.empty() || Back.empty())
      continue;
    InstructionInstance ThisII(Instr);
    InstructionInstance OtherII(OtherInstr);
    if (!Forward.hasImplicitAliasing())
      setRandomAliasing(Forward, ThisII, OtherII);
    if (!Back.hasImplicitAliasing())
      setRandomAliasing(Back, OtherII, ThisII);
    SnippetPrototype Prototype;
    Prototype.Explanation =
        llvm::formatv("creating cycle through {0}.",
                      State.getInstrInfo().getName(OtherOpcode));
    Prototype.Snippet.push_back(std::move(ThisII));
    Prototype.Snippet.push_back(std::move(OtherII));
    return std::move(Prototype);
  }
  return llvm::make_error<BenchmarkFailure>(
      "Infeasible : Didn't find any scheme to make the instruction serial");
}

llvm::Expected<SnippetPrototype>
LatencyBenchmarkRunner::generatePrototype(unsigned Opcode) const {
  const auto &InstrDesc = State.getInstrInfo().get(Opcode);
  if (auto E = isInfeasible(InstrDesc))
    return std::move(E);
  const Instruction Instr(InstrDesc, RATC);
  if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr))
    return SelfAliasingPrototype;
  else
    llvm::consumeError(SelfAliasingPrototype.takeError());
  // No self aliasing, trying to create a dependency through another opcode.
  return generateTwoInstructionPrototype(Instr);
}

const char *LatencyBenchmarkRunner::getCounterName() const {
  if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
    llvm::report_fatal_error("sched model is missing extra processor info!");
  const char *CounterName = State.getSubtargetInfo()
                                .getSchedModel()
                                .getExtraProcessorInfo()
                                .PfmCounters.CycleCounter;
  if (!CounterName)
    llvm::report_fatal_error("sched model does not define a cycle counter");
  return CounterName;
}

std::vector<BenchmarkMeasure>
LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
                                        const unsigned NumRepetitions) const {
  // Cycle measurements include some overhead from the kernel. Repeat the
  // measure several times and take the minimum value.
  constexpr const int NumMeasurements = 30;
  int64_t MinLatency = std::numeric_limits<int64_t>::max();
  const char *CounterName = getCounterName();
  if (!CounterName)
    llvm::report_fatal_error("could not determine cycle counter name");
  const pfm::PerfEvent CyclesPerfEvent(CounterName);
  if (!CyclesPerfEvent.valid())
    llvm::report_fatal_error("invalid perf event");
  for (size_t I = 0; I < NumMeasurements; ++I) {
    pfm::Counter Counter(CyclesPerfEvent);
    Counter.start();
    Function();
    Counter.stop();
    const int64_t Value = Counter.read();
    if (Value < MinLatency)
      MinLatency = Value;
  }
  return {{"latency", static_cast<double>(MinLatency) / NumRepetitions, ""}};
}

} // namespace exegesis