//===- Replacement.cpp - Framework for clang refactoring tools ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Implements classes to support/store refactorings.
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include "clang/Rewrite/Core/RewriteBuffer.h"
#include "clang/Rewrite/Core/Rewriter.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <limits>
#include <map>
#include <string>
#include <utility>
#include <vector>
using namespace clang;
using namespace tooling;
static const char * const InvalidLocation = "";
Replacement::Replacement() : FilePath(InvalidLocation) {}
Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length,
StringRef ReplacementText)
: FilePath(std::string(FilePath)), ReplacementRange(Offset, Length),
ReplacementText(std::string(ReplacementText)) {}
Replacement::Replacement(const SourceManager &Sources, SourceLocation Start,
unsigned Length, StringRef ReplacementText) {
setFromSourceLocation(Sources, Start, Length, ReplacementText);
}
Replacement::Replacement(const SourceManager &Sources,
const CharSourceRange &Range,
StringRef ReplacementText,
const LangOptions &LangOpts) {
setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
}
bool Replacement::isApplicable() const {
return FilePath != InvalidLocation;
}
bool Replacement::apply(Rewriter &Rewrite) const {
SourceManager &SM = Rewrite.getSourceMgr();
auto Entry = SM.getFileManager().getFile(FilePath);
if (!Entry)
return false;
FileID ID = SM.getOrCreateFileID(*Entry, SrcMgr::C_User);
const SourceLocation Start =
SM.getLocForStartOfFile(ID).
getLocWithOffset(ReplacementRange.getOffset());
// ReplaceText returns false on success.
// ReplaceText only fails if the source location is not a file location, in
// which case we already returned false earlier.
bool RewriteSucceeded = !Rewrite.ReplaceText(
Start, ReplacementRange.getLength(), ReplacementText);
assert(RewriteSucceeded);
return RewriteSucceeded;
}
std::string Replacement::toString() const {
std::string Result;
llvm::raw_string_ostream Stream(Result);
Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
<< ReplacementRange.getLength() << ":\"" << ReplacementText << "\"";
return Stream.str();
}
namespace clang {
namespace tooling {
bool operator<(const Replacement &LHS, const Replacement &RHS) {
if (LHS.getOffset() != RHS.getOffset())
return LHS.getOffset() < RHS.getOffset();
if (LHS.getLength() != RHS.getLength())
return LHS.getLength() < RHS.getLength();
if (LHS.getFilePath() != RHS.getFilePath())
return LHS.getFilePath() < RHS.getFilePath();
return LHS.getReplacementText() < RHS.getReplacementText();
}
bool operator==(const Replacement &LHS, const Replacement &RHS) {
return LHS.getOffset() == RHS.getOffset() &&
LHS.getLength() == RHS.getLength() &&
LHS.getFilePath() == RHS.getFilePath() &&
LHS.getReplacementText() == RHS.getReplacementText();
}
} // namespace tooling
} // namespace clang
void Replacement::setFromSourceLocation(const SourceManager &Sources,
SourceLocation Start, unsigned Length,
StringRef ReplacementText) {
const std::pair<FileID, unsigned> DecomposedLocation =
Sources.getDecomposedLoc(Start);
const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first);
this->FilePath = std::string(Entry ? Entry->getName() : InvalidLocation);
this->ReplacementRange = Range(DecomposedLocation.second, Length);
this->ReplacementText = std::string(ReplacementText);
}
// FIXME: This should go into the Lexer, but we need to figure out how
// to handle ranges for refactoring in general first - there is no obvious
// good way how to integrate this into the Lexer yet.
static int getRangeSize(const SourceManager &Sources,
const CharSourceRange &Range,
const LangOptions &LangOpts) {
SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin());
SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd());
std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin);
std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd);
if (Start.first != End.first) return -1;
if (Range.isTokenRange())
End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts);
return End.second - Start.second;
}
void Replacement::setFromSourceRange(const SourceManager &Sources,
const CharSourceRange &Range,
StringRef ReplacementText,
const LangOptions &LangOpts) {
setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()),
getRangeSize(Sources, Range, LangOpts),
ReplacementText);
}
Replacement
Replacements::getReplacementInChangedCode(const Replacement &R) const {
unsigned NewStart = getShiftedCodePosition(R.getOffset());
unsigned NewEnd = getShiftedCodePosition(R.getOffset() + R.getLength());
return Replacement(R.getFilePath(), NewStart, NewEnd - NewStart,
R.getReplacementText());
}
static std::string getReplacementErrString(replacement_error Err) {
switch (Err) {
case replacement_error::fail_to_apply:
return "Failed to apply a replacement.";
case replacement_error::wrong_file_path:
return "The new replacement's file path is different from the file path of "
"existing replacements";
case replacement_error::overlap_conflict:
return "The new replacement overlaps with an existing replacement.";
case replacement_error::insert_conflict:
return "The new insertion has the same insert location as an existing "
"replacement.";
}
llvm_unreachable("A value of replacement_error has no message.");
}
std::string ReplacementError::message() const {
std::string Message = getReplacementErrString(Err);
if (NewReplacement.hasValue())
Message += "\nNew replacement: " + NewReplacement->toString();
if (ExistingReplacement.hasValue())
Message += "\nExisting replacement: " + ExistingReplacement->toString();
return Message;
}
char ReplacementError::ID = 0;
Replacements Replacements::getCanonicalReplacements() const {
std::vector<Replacement> NewReplaces;
// Merge adjacent replacements.
for (const auto &R : Replaces) {
if (NewReplaces.empty()) {
NewReplaces.push_back(R);
continue;
}
auto &Prev = NewReplaces.back();
unsigned PrevEnd = Prev.getOffset() + Prev.getLength();
if (PrevEnd < R.getOffset()) {
NewReplaces.push_back(R);
} else {
assert(PrevEnd == R.getOffset() &&
"Existing replacements must not overlap.");
Replacement NewR(
R.getFilePath(), Prev.getOffset(), Prev.getLength() + R.getLength(),
(Prev.getReplacementText() + R.getReplacementText()).str());
Prev = NewR;
}
}
ReplacementsImpl NewReplacesImpl(NewReplaces.begin(), NewReplaces.end());
return Replacements(NewReplacesImpl.begin(), NewReplacesImpl.end());
}
// `R` and `Replaces` are order-independent if applying them in either order
// has the same effect, so we need to compare replacements associated to
// applying them in either order.
llvm::Expected<Replacements>
Replacements::mergeIfOrderIndependent(const Replacement &R) const {
Replacements Rs(R);
// A Replacements set containing a single replacement that is `R` referring to
// the code after the existing replacements `Replaces` are applied.
Replacements RsShiftedByReplaces(getReplacementInChangedCode(R));
// A Replacements set that is `Replaces` referring to the code after `R` is
// applied.
Replacements ReplacesShiftedByRs;
for (const auto &Replace : Replaces)
ReplacesShiftedByRs.Replaces.insert(
Rs.getReplacementInChangedCode(Replace));
// This is equivalent to applying `Replaces` first and then `R`.
auto MergeShiftedRs = merge(RsShiftedByReplaces);
// This is equivalent to applying `R` first and then `Replaces`.
auto MergeShiftedReplaces = Rs.merge(ReplacesShiftedByRs);
// Since empty or segmented replacements around existing replacements might be
// produced above, we need to compare replacements in canonical forms.
if (MergeShiftedRs.getCanonicalReplacements() ==
MergeShiftedReplaces.getCanonicalReplacements())
return MergeShiftedRs;
return llvm::make_error<ReplacementError>(replacement_error::overlap_conflict,
R, *Replaces.begin());
}
llvm::Error Replacements::add(const Replacement &R) {
// Check the file path.
if (!Replaces.empty() && R.getFilePath() != Replaces.begin()->getFilePath())
return llvm::make_error<ReplacementError>(
replacement_error::wrong_file_path, R, *Replaces.begin());
// Special-case header insertions.
if (R.getOffset() == std::numeric_limits<unsigned>::max()) {
Replaces.insert(R);
return llvm::Error::success();
}
// This replacement cannot conflict with replacements that end before
// this replacement starts or start after this replacement ends.
// We also know that there currently are no overlapping replacements.
// Thus, we know that all replacements that start after the end of the current
// replacement cannot overlap.
Replacement AtEnd(R.getFilePath(), R.getOffset() + R.getLength(), 0, "");
// Find the first entry that starts after or at the end of R. Note that
// entries that start at the end can still be conflicting if R is an
// insertion.
auto I = Replaces.lower_bound(AtEnd);
// If `I` starts at the same offset as `R`, `R` must be an insertion.
if (I != Replaces.end() && R.getOffset() == I->getOffset()) {
assert(R.getLength() == 0);
// `I` is also an insertion, `R` and `I` conflict.
if (I->getLength() == 0) {
// Check if two insertions are order-indepedent: if inserting them in
// either order produces the same text, they are order-independent.
if ((R.getReplacementText() + I->getReplacementText()).str() !=
(I->getReplacementText() + R.getReplacementText()).str())
return llvm::make_error<ReplacementError>(
replacement_error::insert_conflict, R, *I);
// If insertions are order-independent, we can merge them.
Replacement NewR(
R.getFilePath(), R.getOffset(), 0,
(R.getReplacementText() + I->getReplacementText()).str());
Replaces.erase(I);
Replaces.insert(std::move(NewR));
return llvm::Error::success();
}
// Insertion `R` is adjacent to a non-insertion replacement `I`, so they
// are order-independent. It is safe to assume that `R` will not conflict
// with any replacement before `I` since all replacements before `I` must
// either end before `R` or end at `R` but has length > 0 (if the
// replacement before `I` is an insertion at `R`, it would have been `I`
// since it is a lower bound of `AtEnd` and ordered before the current `I`
// in the set).
Replaces.insert(R);
return llvm::Error::success();
}
// `I` is the smallest iterator (after `R`) whose entry cannot overlap.
// If that is begin(), there are no overlaps.
if (I == Replaces.begin()) {
Replaces.insert(R);
return llvm::Error::success();
}
--I;
auto Overlap = [](const Replacement &R1, const Replacement &R2) -> bool {
return Range(R1.getOffset(), R1.getLength())
.overlapsWith(Range(R2.getOffset(), R2.getLength()));
};
// If the previous entry does not overlap, we know that entries before it
// can also not overlap.
if (!Overlap(R, *I)) {
// If `R` and `I` do not have the same offset, it is safe to add `R` since
// it must come after `I`. Otherwise:
// - If `R` is an insertion, `I` must not be an insertion since it would
// have come after `AtEnd`.
// - If `R` is not an insertion, `I` must be an insertion; otherwise, `R`
// and `I` would have overlapped.
// In either case, we can safely insert `R`.
Replaces.insert(R);
} else {
// `I` overlaps with `R`. We need to check `R` against all overlapping
// replacements to see if they are order-indepedent. If they are, merge `R`
// with them and replace them with the merged replacements.
auto MergeBegin = I;
auto MergeEnd = std::next(I);
while (I != Replaces.begin()) {
--I;
// If `I` doesn't overlap with `R`, don't merge it.
if (!Overlap(R, *I))
break;
MergeBegin = I;
}
Replacements OverlapReplaces(MergeBegin, MergeEnd);
llvm::Expected<Replacements> Merged =
OverlapReplaces.mergeIfOrderIndependent(R);
if (!Merged)
return Merged.takeError();
Replaces.erase(MergeBegin, MergeEnd);
Replaces.insert(Merged->begin(), Merged->end());
}
return llvm::Error::success();
}
namespace {
// Represents a merged replacement, i.e. a replacement consisting of multiple
// overlapping replacements from 'First' and 'Second' in mergeReplacements.
//
// Position projection:
// Offsets and lengths of the replacements can generally refer to two different
// coordinate spaces. Replacements from 'First' refer to the original text
// whereas replacements from 'Second' refer to the text after applying 'First'.
//
// MergedReplacement always operates in the coordinate space of the original
// text, i.e. transforms elements from 'Second' to take into account what was
// changed based on the elements from 'First'.
//
// We can correctly calculate this projection as we look at the replacements in
// order of strictly increasing offsets.
//
// Invariants:
// * We always merge elements from 'First' into elements from 'Second' and vice
// versa. Within each set, the replacements are non-overlapping.
// * We only extend to the right, i.e. merge elements with strictly increasing
// offsets.
class MergedReplacement {
public:
MergedReplacement(const Replacement &R, bool MergeSecond, int D)
: MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()),
Offset(R.getOffset() + (MergeSecond ? 0 : Delta)),
Length(R.getLength()), Text(std::string(R.getReplacementText())) {
Delta += MergeSecond ? 0 : Text.size() - Length;
DeltaFirst = MergeSecond ? Text.size() - Length : 0;
}
// Merges the next element 'R' into this merged element. As we always merge
// from 'First' into 'Second' or vice versa, the MergedReplacement knows what
// set the next element is coming from.
void merge(const Replacement &R) {
if (MergeSecond) {
unsigned REnd = R.getOffset() + Delta + R.getLength();
unsigned End = Offset + Text.size();
if (REnd > End) {
Length += REnd - End;
MergeSecond = false;
}
StringRef TextRef = Text;
StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset);
StringRef Tail = TextRef.substr(REnd - Offset);
Text = (Head + R.getReplacementText() + Tail).str();
Delta += R.getReplacementText().size() - R.getLength();
} else {
unsigned End = Offset + Length;
StringRef RText = R.getReplacementText();
StringRef Tail = RText.substr(End - R.getOffset());
Text = (Text + Tail).str();
if (R.getOffset() + RText.size() > End) {
Length = R.getOffset() + R.getLength() - Offset;
MergeSecond = true;
} else {
Length += R.getLength() - RText.size();
}
DeltaFirst += RText.size() - R.getLength();
}
}
// Returns 'true' if 'R' starts strictly after the MergedReplacement and thus
// doesn't need to be merged.
bool endsBefore(const Replacement &R) const {
if (MergeSecond)
return Offset + Text.size() < R.getOffset() + Delta;
return Offset + Length < R.getOffset();
}
// Returns 'true' if an element from the second set should be merged next.
bool mergeSecond() const { return MergeSecond; }
int deltaFirst() const { return DeltaFirst; }
Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; }
private:
bool MergeSecond;
// Amount of characters that elements from 'Second' need to be shifted by in
// order to refer to the original text.
int Delta;
// Sum of all deltas (text-length - length) of elements from 'First' merged
// into this element. This is used to update 'Delta' once the
// MergedReplacement is completed.
int DeltaFirst;
// Data of the actually merged replacement. FilePath and Offset aren't changed
// as the element is only extended to the right.
const StringRef FilePath;
const unsigned Offset;
unsigned Length;
std::string Text;
};
} // namespace
Replacements Replacements::merge(const Replacements &ReplacesToMerge) const {
if (empty() || ReplacesToMerge.empty())
return empty() ? ReplacesToMerge : *this;
auto &First = Replaces;
auto &Second = ReplacesToMerge.Replaces;
// Delta is the amount of characters that replacements from 'Second' need to
// be shifted so that their offsets refer to the original text.
int Delta = 0;
ReplacementsImpl Result;
// Iterate over both sets and always add the next element (smallest total
// Offset) from either 'First' or 'Second'. Merge that element with
// subsequent replacements as long as they overlap. See more details in the
// comment on MergedReplacement.
for (auto FirstI = First.begin(), SecondI = Second.begin();
FirstI != First.end() || SecondI != Second.end();) {
bool NextIsFirst = SecondI == Second.end() ||
(FirstI != First.end() &&
FirstI->getOffset() < SecondI->getOffset() + Delta);
MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst,
Delta);
++(NextIsFirst ? FirstI : SecondI);
while ((Merged.mergeSecond() && SecondI != Second.end()) ||
(!Merged.mergeSecond() && FirstI != First.end())) {
auto &I = Merged.mergeSecond() ? SecondI : FirstI;
if (Merged.endsBefore(*I))
break;
Merged.merge(*I);
++I;
}
Delta -= Merged.deltaFirst();
Result.insert(Merged.asReplacement());
}
return Replacements(Result.begin(), Result.end());
}
// Combines overlapping ranges in \p Ranges and sorts the combined ranges.
// Returns a set of non-overlapping and sorted ranges that is equivalent to
// \p Ranges.
static std::vector<Range> combineAndSortRanges(std::vector<Range> Ranges) {
llvm::sort(Ranges, [](const Range &LHS, const Range &RHS) {
if (LHS.getOffset() != RHS.getOffset())
return LHS.getOffset() < RHS.getOffset();
return LHS.getLength() < RHS.getLength();
});
std::vector<Range> Result;
for (const auto &R : Ranges) {
if (Result.empty() ||
Result.back().getOffset() + Result.back().getLength() < R.getOffset()) {
Result.push_back(R);
} else {
unsigned NewEnd =
std::max(Result.back().getOffset() + Result.back().getLength(),
R.getOffset() + R.getLength());
Result[Result.size() - 1] =
Range(Result.back().getOffset(), NewEnd - Result.back().getOffset());
}
}
return Result;
}
namespace clang {
namespace tooling {
std::vector<Range>
calculateRangesAfterReplacements(const Replacements &Replaces,
const std::vector<Range> &Ranges) {
// To calculate the new ranges,
// - Turn \p Ranges into Replacements at (offset, length) with an empty
// (unimportant) replacement text of length "length".
// - Merge with \p Replaces.
// - The new ranges will be the affected ranges of the merged replacements.
auto MergedRanges = combineAndSortRanges(Ranges);
if (Replaces.empty())
return MergedRanges;
tooling::Replacements FakeReplaces;
for (const auto &R : MergedRanges) {
llvm::cantFail(
FakeReplaces.add(Replacement(Replaces.begin()->getFilePath(),
R.getOffset(), R.getLength(),
std::string(R.getLength(), ' '))),
"Replacements must not conflict since ranges have been merged.");
}
return FakeReplaces.merge(Replaces).getAffectedRanges();
}
} // namespace tooling
} // namespace clang
std::vector<Range> Replacements::getAffectedRanges() const {
std::vector<Range> ChangedRanges;
int Shift = 0;
for (const auto &R : Replaces) {
unsigned Offset = R.getOffset() + Shift;
unsigned Length = R.getReplacementText().size();
Shift += Length - R.getLength();
ChangedRanges.push_back(Range(Offset, Length));
}
return combineAndSortRanges(ChangedRanges);
}
unsigned Replacements::getShiftedCodePosition(unsigned Position) const {
unsigned Offset = 0;
for (const auto &R : Replaces) {
if (R.getOffset() + R.getLength() <= Position) {
Offset += R.getReplacementText().size() - R.getLength();
continue;
}
if (R.getOffset() < Position &&
R.getOffset() + R.getReplacementText().size() <= Position) {
Position = R.getOffset() + R.getReplacementText().size();
if (!R.getReplacementText().empty())
Position--;
}
break;
}
return Position + Offset;
}
namespace clang {
namespace tooling {
bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) {
bool Result = true;
for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) {
if (I->isApplicable()) {
Result = I->apply(Rewrite) && Result;
} else {
Result = false;
}
}
return Result;
}
llvm::Expected<std::string> applyAllReplacements(StringRef Code,
const Replacements &Replaces) {
if (Replaces.empty())
return Code.str();
IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
new llvm::vfs::InMemoryFileSystem);
FileManager Files(FileSystemOptions(), InMemoryFileSystem);
DiagnosticsEngine Diagnostics(
IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
new DiagnosticOptions);
SourceManager SourceMgr(Diagnostics, Files);
Rewriter Rewrite(SourceMgr, LangOptions());
InMemoryFileSystem->addFile(
"<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>"));
FileID ID = SourceMgr.createFileID(*Files.getFile("<stdin>"),
SourceLocation(),
clang::SrcMgr::C_User);
for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) {
Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
I->getReplacementText());
if (!Replace.apply(Rewrite))
return llvm::make_error<ReplacementError>(
replacement_error::fail_to_apply, Replace);
}
std::string Result;
llvm::raw_string_ostream OS(Result);
Rewrite.getEditBuffer(ID).write(OS);
OS.flush();
return Result;
}
std::map<std::string, Replacements> groupReplacementsByFile(
FileManager &FileMgr,
const std::map<std::string, Replacements> &FileToReplaces) {
std::map<std::string, Replacements> Result;
llvm::SmallPtrSet<const FileEntry *, 16> ProcessedFileEntries;
for (const auto &Entry : FileToReplaces) {
auto FE = FileMgr.getFile(Entry.first);
if (!FE)
llvm::errs() << "File path " << Entry.first << " is invalid.\n";
else if (ProcessedFileEntries.insert(*FE).second)
Result[Entry.first] = std::move(Entry.second);
}
return Result;
}
} // namespace tooling
} // namespace clang