230 lines
6.5 KiB
C++
230 lines
6.5 KiB
C++
//===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_
|
|
#define LIB_TOOLS_PDLL_PARSER_LEXER_H_
|
|
|
|
#include "mlir/Support/LLVM.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Support/SMLoc.h"
|
|
|
|
namespace llvm {
|
|
class SourceMgr;
|
|
} // namespace llvm
|
|
|
|
namespace mlir {
|
|
struct LogicalResult;
|
|
|
|
namespace pdll {
|
|
class CodeCompleteContext;
|
|
|
|
namespace ast {
|
|
class DiagnosticEngine;
|
|
} // namespace ast
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Token
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
class Token {
|
|
public:
|
|
enum Kind {
|
|
/// Markers.
|
|
eof,
|
|
error,
|
|
/// Token signifying a code completion location.
|
|
code_complete,
|
|
/// Token signifying a code completion location within a string.
|
|
code_complete_string,
|
|
|
|
/// Keywords.
|
|
KW_BEGIN,
|
|
/// Dependent keywords, i.e. those that are treated as keywords depending on
|
|
/// the current parser context.
|
|
KW_DEPENDENT_BEGIN,
|
|
kw_attr,
|
|
kw_op,
|
|
kw_type,
|
|
KW_DEPENDENT_END,
|
|
|
|
/// General keywords.
|
|
kw_Attr,
|
|
kw_erase,
|
|
kw_let,
|
|
kw_Constraint,
|
|
kw_Op,
|
|
kw_OpName,
|
|
kw_Pattern,
|
|
kw_replace,
|
|
kw_return,
|
|
kw_rewrite,
|
|
kw_Rewrite,
|
|
kw_Type,
|
|
kw_TypeRange,
|
|
kw_Value,
|
|
kw_ValueRange,
|
|
kw_with,
|
|
KW_END,
|
|
|
|
/// Punctuation.
|
|
arrow,
|
|
colon,
|
|
comma,
|
|
dot,
|
|
equal,
|
|
equal_arrow,
|
|
semicolon,
|
|
/// Paired punctuation.
|
|
less,
|
|
greater,
|
|
l_brace,
|
|
r_brace,
|
|
l_paren,
|
|
r_paren,
|
|
l_square,
|
|
r_square,
|
|
underscore,
|
|
|
|
/// Tokens.
|
|
directive,
|
|
identifier,
|
|
integer,
|
|
string_block,
|
|
string
|
|
};
|
|
Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
|
|
|
|
/// Given a token containing a string literal, return its value, including
|
|
/// removing the quote characters and unescaping the contents of the string.
|
|
std::string getStringValue() const;
|
|
|
|
/// Returns true if the current token is a string literal.
|
|
bool isString() const { return isAny(Token::string, Token::string_block); }
|
|
|
|
/// Returns true if the current token is a keyword.
|
|
bool isKeyword() const {
|
|
return kind > Token::KW_BEGIN && kind < Token::KW_END;
|
|
}
|
|
|
|
/// Returns true if the current token is a keyword in a dependent context, and
|
|
/// in any other situation (e.g. variable names) may be treated as an
|
|
/// identifier.
|
|
bool isDependentKeyword() const {
|
|
return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END;
|
|
}
|
|
|
|
/// Return the bytes that make up this token.
|
|
StringRef getSpelling() const { return spelling; }
|
|
|
|
/// Return the kind of this token.
|
|
Kind getKind() const { return kind; }
|
|
|
|
/// Return true if this token is one of the specified kinds.
|
|
bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
|
|
template <typename... T>
|
|
bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
|
|
return is(k1) || isAny(k2, k3, others...);
|
|
}
|
|
|
|
/// Return if the token does not have the given kind.
|
|
bool isNot(Kind k) const { return k != kind; }
|
|
template <typename... T>
|
|
bool isNot(Kind k1, Kind k2, T... others) const {
|
|
return !isAny(k1, k2, others...);
|
|
}
|
|
|
|
/// Return if the token has the given kind.
|
|
bool is(Kind k) const { return kind == k; }
|
|
|
|
/// Return a location for the start of this token.
|
|
SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); }
|
|
/// Return a location at the end of this token.
|
|
SMLoc getEndLoc() const {
|
|
return SMLoc::getFromPointer(spelling.data() + spelling.size());
|
|
}
|
|
/// Return a location for the range of this token.
|
|
SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); }
|
|
|
|
private:
|
|
/// Discriminator that indicates the kind of token this is.
|
|
Kind kind;
|
|
|
|
/// A reference to the entire token contents; this is always a pointer into
|
|
/// a memory buffer owned by the source manager.
|
|
StringRef spelling;
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Lexer
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
class Lexer {
|
|
public:
|
|
Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
|
|
CodeCompleteContext *codeCompleteContext);
|
|
~Lexer();
|
|
|
|
/// Return a reference to the source manager used by the lexer.
|
|
llvm::SourceMgr &getSourceMgr() { return srcMgr; }
|
|
|
|
/// Return a reference to the diagnostic engine used by the lexer.
|
|
ast::DiagnosticEngine &getDiagEngine() { return diagEngine; }
|
|
|
|
/// Push an include of the given file. This will cause the lexer to start
|
|
/// processing the provided file. Returns failure if the file could not be
|
|
/// opened, success otherwise.
|
|
LogicalResult pushInclude(StringRef filename, SMRange includeLoc);
|
|
|
|
/// Lex the next token and return it.
|
|
Token lexToken();
|
|
|
|
/// Change the position of the lexer cursor. The next token we lex will start
|
|
/// at the designated point in the input.
|
|
void resetPointer(const char *newPointer) { curPtr = newPointer; }
|
|
|
|
/// Emit an error to the lexer with the given location and message.
|
|
Token emitError(SMRange loc, const Twine &msg);
|
|
Token emitError(const char *loc, const Twine &msg);
|
|
Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
|
|
const Twine ¬e);
|
|
|
|
private:
|
|
Token formToken(Token::Kind kind, const char *tokStart) {
|
|
return Token(kind, StringRef(tokStart, curPtr - tokStart));
|
|
}
|
|
|
|
/// Return the next character in the stream.
|
|
int getNextChar();
|
|
|
|
/// Lex methods.
|
|
void lexComment();
|
|
Token lexDirective(const char *tokStart);
|
|
Token lexIdentifier(const char *tokStart);
|
|
Token lexNumber(const char *tokStart);
|
|
Token lexString(const char *tokStart, bool isStringBlock);
|
|
|
|
llvm::SourceMgr &srcMgr;
|
|
int curBufferID;
|
|
StringRef curBuffer;
|
|
const char *curPtr;
|
|
|
|
/// The engine used to emit diagnostics during lexing/parsing.
|
|
ast::DiagnosticEngine &diagEngine;
|
|
|
|
/// A flag indicating if we added a default diagnostic handler to the provided
|
|
/// diagEngine.
|
|
bool addedHandlerToDiagEngine;
|
|
|
|
/// The optional code completion point within the input file.
|
|
const char *codeCompletionLocation;
|
|
};
|
|
} // namespace pdll
|
|
} // namespace mlir
|
|
|
|
#endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_
|