206 lines
6.5 KiB
C++
206 lines
6.5 KiB
C++
//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// This file defines the log symbolizer markup data model and parser.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/DebugInfo/Symbolize/Markup.h"
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
|
|
namespace llvm {
|
|
namespace symbolize {
|
|
|
|
// Matches the following:
|
|
// "\033[0m"
|
|
// "\033[1m"
|
|
// "\033[30m" -- "\033[37m"
|
|
static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
|
|
|
|
MarkupParser::MarkupParser(StringSet<> MultilineTags)
|
|
: MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
|
|
|
|
static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
|
|
return Str.take_front(Pos - Str.begin());
|
|
}
|
|
static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
|
|
Str = Str.drop_front(Pos - Str.begin());
|
|
}
|
|
|
|
void MarkupParser::parseLine(StringRef Line) {
|
|
Buffer.clear();
|
|
NextIdx = 0;
|
|
FinishedMultiline.clear();
|
|
this->Line = Line;
|
|
}
|
|
|
|
std::optional<MarkupNode> MarkupParser::nextNode() {
|
|
// Pull something out of the buffer if possible.
|
|
if (!Buffer.empty()) {
|
|
if (NextIdx < Buffer.size())
|
|
return std::move(Buffer[NextIdx++]);
|
|
NextIdx = 0;
|
|
Buffer.clear();
|
|
}
|
|
|
|
// The buffer is empty, so parse the next bit of the line.
|
|
|
|
if (Line.empty())
|
|
return std::nullopt;
|
|
|
|
if (!InProgressMultiline.empty()) {
|
|
if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
|
|
llvm::append_range(InProgressMultiline, *MultilineEnd);
|
|
assert(FinishedMultiline.empty() &&
|
|
"At most one multi-line element can be finished at a time.");
|
|
FinishedMultiline.swap(InProgressMultiline);
|
|
// Parse the multi-line element as if it were contiguous.
|
|
advanceTo(Line, MultilineEnd->end());
|
|
return *parseElement(FinishedMultiline);
|
|
}
|
|
|
|
// The whole line is part of the multi-line element.
|
|
llvm::append_range(InProgressMultiline, Line);
|
|
Line = Line.drop_front(Line.size());
|
|
return std::nullopt;
|
|
}
|
|
|
|
// Find the first valid markup element, if any.
|
|
if (std::optional<MarkupNode> Element = parseElement(Line)) {
|
|
parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
|
|
Buffer.push_back(std::move(*Element));
|
|
advanceTo(Line, Element->Text.end());
|
|
return nextNode();
|
|
}
|
|
|
|
// Since there were no valid elements remaining, see if the line opens a
|
|
// multi-line element.
|
|
if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
|
|
// Emit any text before the element.
|
|
parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
|
|
|
|
// Begin recording the multi-line element.
|
|
llvm::append_range(InProgressMultiline, *MultilineBegin);
|
|
Line = Line.drop_front(Line.size());
|
|
return nextNode();
|
|
}
|
|
|
|
// The line doesn't contain any more markup elements, so emit it as text.
|
|
parseTextOutsideMarkup(Line);
|
|
Line = Line.drop_front(Line.size());
|
|
return nextNode();
|
|
}
|
|
|
|
void MarkupParser::flush() {
|
|
Buffer.clear();
|
|
NextIdx = 0;
|
|
Line = {};
|
|
if (InProgressMultiline.empty())
|
|
return;
|
|
FinishedMultiline.swap(InProgressMultiline);
|
|
parseTextOutsideMarkup(FinishedMultiline);
|
|
}
|
|
|
|
// Finds and returns the next valid markup element in the given line. Returns
|
|
// std::nullopt if the line contains no valid elements.
|
|
std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
|
|
while (true) {
|
|
// Find next element using begin and end markers.
|
|
size_t BeginPos = Line.find("{{{");
|
|
if (BeginPos == StringRef::npos)
|
|
return std::nullopt;
|
|
size_t EndPos = Line.find("}}}", BeginPos + 3);
|
|
if (EndPos == StringRef::npos)
|
|
return std::nullopt;
|
|
EndPos += 3;
|
|
MarkupNode Element;
|
|
Element.Text = Line.slice(BeginPos, EndPos);
|
|
Line = Line.substr(EndPos);
|
|
|
|
// Parse tag.
|
|
StringRef Content = Element.Text.drop_front(3).drop_back(3);
|
|
StringRef FieldsContent;
|
|
std::tie(Element.Tag, FieldsContent) = Content.split(':');
|
|
if (Element.Tag.empty())
|
|
continue;
|
|
|
|
// Parse fields.
|
|
if (!FieldsContent.empty())
|
|
FieldsContent.split(Element.Fields, ":");
|
|
else if (Content.back() == ':')
|
|
Element.Fields.push_back(FieldsContent);
|
|
|
|
return Element;
|
|
}
|
|
}
|
|
|
|
static MarkupNode textNode(StringRef Text) {
|
|
MarkupNode Node;
|
|
Node.Text = Text;
|
|
return Node;
|
|
}
|
|
|
|
// Parses a region of text known to be outside any markup elements. Such text
|
|
// may still contain SGR control codes, so the region is further subdivided into
|
|
// control codes and true text regions.
|
|
void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
|
|
if (Text.empty())
|
|
return;
|
|
SmallVector<StringRef> Matches;
|
|
while (SGRSyntax.match(Text, &Matches)) {
|
|
// Emit any text before the SGR element.
|
|
if (Matches.begin()->begin() != Text.begin())
|
|
Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
|
|
|
|
Buffer.push_back(textNode(*Matches.begin()));
|
|
advanceTo(Text, Matches.begin()->end());
|
|
}
|
|
if (!Text.empty())
|
|
Buffer.push_back(textNode(Text));
|
|
}
|
|
|
|
// Given that a line doesn't contain any valid markup, see if it ends with the
|
|
// start of a multi-line element. If so, returns the beginning.
|
|
std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
|
|
// A multi-line begin marker must be the last one on the line.
|
|
size_t BeginPos = Line.rfind("{{{");
|
|
if (BeginPos == StringRef::npos)
|
|
return std::nullopt;
|
|
size_t BeginTagPos = BeginPos + 3;
|
|
|
|
// If there are any end markers afterwards, the begin marker cannot belong to
|
|
// a multi-line element.
|
|
size_t EndPos = Line.find("}}}", BeginTagPos);
|
|
if (EndPos != StringRef::npos)
|
|
return std::nullopt;
|
|
|
|
// Check whether the tag is registered multi-line.
|
|
size_t EndTagPos = Line.find(':', BeginTagPos);
|
|
if (EndTagPos == StringRef::npos)
|
|
return std::nullopt;
|
|
StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
|
|
if (!MultilineTags.contains(Tag))
|
|
return std::nullopt;
|
|
return Line.substr(BeginPos);
|
|
}
|
|
|
|
// See if the line begins with the ending of an in-progress multi-line element.
|
|
// If so, return the ending.
|
|
std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
|
|
size_t EndPos = Line.find("}}}");
|
|
if (EndPos == StringRef::npos)
|
|
return std::nullopt;
|
|
return Line.take_front(EndPos + 3);
|
|
}
|
|
|
|
} // end namespace symbolize
|
|
} // end namespace llvm
|