1#include "Parser.hpp"2#include "Ast.hpp"3#include <algorithm>4#include <cassert>5#include <cctype>6#include <cstddef>7#include <format>8using namespace parsing;910ParseError::ParseError(ast::Position position, std::string message)11 : std::logic_error{message}, position(position)12{13}1415BasicParser::BasicParser(std::istream &stream)16 : in(stream), buffer{}, continue_line{}, error{}17{18}1920Parser::Parser(std::istream &stream)21 : BasicParser{stream}, arith_nested_parens{0}22{23}2425char BasicParser::peekChar(const std::size_t index)26{27 const std::size_t old_size = buffer.size();28 if (index < old_size) {29 return buffer[buffer.size() - index - 1];30 }3132 const std::size_t needed_chars = index + 1;33 const std::size_t chars_to_read = needed_chars - old_size;3435 buffer.resize(needed_chars);3637 if (old_size > 0)38 std::copy_backward(buffer.begin(), buffer.begin() + old_size,39 buffer.end());4041 char ch{};42 for (std::size_t i = 0; i < chars_to_read; ++i) {43 if (!in.get(ch)) {44 buffer.erase(buffer.begin(),45 buffer.begin() + (chars_to_read - i));46 // buffer.resize(old_size + i);47 return END;48 }49 buffer[chars_to_read - 1 - i] = ch;50 }5152 return *(buffer.end() - index - 1);53}5455char BasicParser::readChar(void)56{57 continue_line = false;58 char ch{};59 if (buffer.size() == 0)60 peekChar(0);61 if (buffer.size() == 0)62 return (END);6364 ch = buffer.back();65 buffer.pop_back();66 if (ch == '\n')67 current_position.addLine();68 else69 current_position.addCol();7071 return (ch);72}7374std::optional<std::string> BasicParser::readString(std::size_t len)75{76 continue_line = false;77 peekChar(len - 1);78 if (buffer.size() < len)79 return {};8081 std::string str{buffer.end() - len, buffer.end()};82 buffer.resize(buffer.size() - len);83 std::reverse(str.begin(), str.end());8485 for (const char &ch : str) {86 if (ch == '\n')87 current_position.addLine();88 else89 current_position.addCol();90 }91 return str;92}9394std::string BasicParser::peekWord(char end)95{96 std::string str{};9798 for (std::size_t i = 0; true; i += 1) {99 const char ch = peekChar(i);100 switch (ch) {101 case '\0':102 case '\n':103 case ')':104 return str;105 case '$':106 case '`':107 case '\'':108 case '"':109 case '\\': // TODO: allow backslash in words110 return {};111 }112 if (std::isblank(ch) || ch == end) {113 return str;114 }115 if (is_operator_start(ch))116 return str;117 str += ch;118 }119}120121// TODO: optimize122SymbolType BasicParser::nextSymbol(void)123{124 const char ch = peekChar(0);125 if (ch == END)126 return SymbolType::end;127 if (ch == '\n')128 return SymbolType::newline;129130 if (std::isblank(ch)) {131 readChar();132 // somewhere call this method to comsume blank133 // TODO: may not call self here134 return nextSymbol();135 }136 if (ch == '#') {137 while (true) {138 const char c = peekChar(1);139 if (c == END || c == '\n')140 break;141 readChar();142 }143 return nextSymbol();144 }145146 for (const auto &i : operators) {147 const SymbolType type{i.first};148 const std::string_view oper{i.second};149150 for (std::size_t j = 0; j < oper.length(); j++) {151 const char ch = peekChar(j);152 if (oper[j] != ch || ch == END)153 goto next_operator;154 }155 return type;156 next_operator:;157 }158 return SymbolType::token;159}160161bool BasicParser::expect(const std::string str)162{163 for (std::size_t i = 0; i < str.length(); i++) {164 if (str[i] != peekChar(i)) {165 setError(std::format("expect string {}", str));166 return false;167 }168 }169 readString(str.length());170 return true;171};172void BasicParser::setError(const std::string msg)173{174 error = std::make_shared<ParseError>(current_position, msg);175}176void BasicParser::clearError() { error = nullptr; }177178std::optional<std::string> Parser::peekName(bool in_braces)179{180 if (BasicParser::nextSymbol() != SymbolType::token)181 return {};182183 std::string str{};184 std::size_t i{0};185186 while (true) {187 char c = peekChar(i);188 if (c != '_' && !isalnum(c)) {189 break;190 } else if (i == 0 && isdigit(c) && !in_braces) {191 break;192 }193 str += c;194 i++;195 }196 if (str.size() == 0)197 return {};198 return str;199}200std::optional<ast::IORedirectOp> Parser::ioRedirectOp()201{202 if (token<false>("<")) {203 return ast::IORedirectOp::less;204 } else if (token<false>(">")) {205 return ast::IORedirectOp::greate;206 }207208 const SymbolType next_sym = nextSymbol();209 ast::IORedirectOp op{};210 switch (next_sym) {211 case SymbolType::less_and:212 op = ast::IORedirectOp::less_and;213 break;214 case SymbolType::great_and:215 op = ast::IORedirectOp::great_and;216 break;217 case SymbolType::double_great:218 op = ast::IORedirectOp::double_great;219 break;220 case SymbolType::clobber:221 op = ast::IORedirectOp::clobber;222 break;223 case SymbolType::less_great:224 op = ast::IORedirectOp::less_great;225 break;226 case SymbolType::double_less:227 op = ast::IORedirectOp::double_less;228 break;229 case SymbolType::double_less_dash:230 op = ast::IORedirectOp::double_less_dash;231 break;232 default:233 return {};234 }235236 readString(operator_len(next_sym));237 return op;238}239int Parser::linebreak()240{241 int count{0};242 char ch{};243 while (BasicParser::nextSymbol() == SymbolType::newline) {244 count += 1;245 ch = readChar();246 assert(ch == '\n');247 }248 return count;249}250251std::optional<char> Parser::separatorOp()252{253 // NOTICE: importance of nextSymbol254 if (BasicParser::nextSymbol() != SymbolType::token) {255 return {};256 }257 const char ch = peekChar(0);258 if (ch == '&' || ch == ';')259 return readChar();260 return {};261}262263template bool Parser::token<false>(std::string_view str);264template bool Parser::token<true>(std::string_view str);265template <bool expect> bool Parser::token(std::string_view str)266{267 if (nextSymbol() != SymbolType::token) {268 if (expect)269 setError(std::format("expect token {}", str));270 return false;271 }272 if (str.length() == 1) {273 if (peekChar(0) == str[0]) {274 readChar();275 } else {276 if (expect)277 setError(std::format("expect token {}", str));278 return false;279 }280 } else {281 const std::string word = peekWord(END);282 if (word != str) {283 if (expect)284 setError(std::format("expect token {}", str));285 return false;286 } else {287 readString(word.length());288 }289 }290 return true;291}292293void Parser::pushWordString(std::vector<std::unique_ptr<ast::Word>> &vec,294 std::string &str, ast::Position begin)295{296 if (str.length() != 0) {297 vec.push_back(std::make_unique<ast::Word::String>(298 str, false, ast::Range(begin, current_position)));299 str = "";300 }301}302303std::size_t parsing::operator_len(SymbolType op)304{305 if (op == SymbolType::double_less_dash)306 return 3;307 else308 return 2;309}310311bool parsing::is_operator_start(char ch)312{313 for (const auto &op : operators) {314 if (op.second[0] == ch)315 return true;316 }317 return false;318}