1#include "Ast.hpp"2#include "Parser.hpp"3#include "logfmtxx.hpp"4#include <cassert>5#include <catch2/catch_message.hpp>6#include <cctype>7#include <cstddef>8#include <iostream>9#include <memory>10#include <sstream>1112using namespace parsing;1314static logfmtxx::logger logger{15 [](const std::string &message) { std::cerr << message << std::endl; },16 logfmtxx::field{"component", "program"},17};1819std::unique_ptr<ast::Program> Parser::expectProgram()20{21 linebreak();22 if (peekChar(0) == END)23 return std::make_unique<ast::Program>(24 std::vector<std::unique_ptr<ast::CommandList>>{});25 auto body = completeCommand<true>();2627 if (body.size() == 0) {28 return nullptr;29 }3031 while (linebreak() != 0) {32 if (BasicParser::nextSymbol() == SymbolType::end)33 break;34 auto nbody = completeCommand<false>();35 if (nbody.size() == 0)36 break;3738 body.insert(body.end(), std::make_move_iterator(nbody.begin()),39 std::make_move_iterator(nbody.end()));40 }4142 return std::make_unique<ast::Program>(std::move(body));43}4445template <bool expect>46std::vector<std::unique_ptr<ast::CommandList>> Parser::completeCommand()47{48 auto list = commandList<false>();4950 if (list == nullptr) {51 if (expect)52 setError("expected a complete command");53 return {};54 }5556 std::vector<std::unique_ptr<ast::CommandList>> cmds{};57 cmds.push_back(std::move(list));5859 while (true) {60 list = commandList<false>();61 if (list == nullptr)62 break;63 cmds.push_back(std::move(list));64 }6566 if (here_documents.size() != 0) {67 for (auto i : here_documents) {68 if (nextSymbol() != SymbolType::newline) {69 setError("expect a newline followed by a "70 "here-document");71 return {};72 }73 readChar();7475 if (!completeHeredoc(i)) {76 return {};77 }78 }79 here_documents.clear();80 }8182 return cmds;83}8485template <bool with_heredoc>86std::unique_ptr<ast::CommandList> Parser::commandList()87{88 auto and_or = andOrList();89 if (and_or == nullptr) {90 return nullptr;91 }9293 auto sep = separatorOp();94 ast::Position sep_pos{};95 if (sep.has_value())96 sep_pos = current_position;9798 if (!with_heredoc) {99 // always return in this branch100101 if (sep.has_value() && sep.value() == '&') {102 return std::make_unique<ast::CommandList>(103 std::move(and_or), true, sep_pos);104 } else {105 return std::make_unique<ast::CommandList>(106 std::move(and_or), false, sep_pos);107 }108 } else {109 if (!sep.has_value())110 linebreak();111 else if (linebreak() != 0)112 sep = '\n';113 }114115 // with_heredoc is ture116 std::unique_ptr<ast::CommandList> cmd;117 if (sep == '&')118 cmd = std::make_unique<ast::CommandList>(std::move(and_or),119 true, sep_pos);120 else121 cmd = std::make_unique<ast::CommandList>(std::move(and_or),122 false, sep_pos);123124 if (sep == '\n' && here_documents.size() != 0) {125 for (auto i : here_documents) {126 if (!completeHeredoc(i)) {127 return nullptr;128 }129 }130 // IORedirect also stored in SimpleCommand.io_redirects131 // It's fine to been clear.132 here_documents.clear();133 }134 return cmd;135}136137std::unique_ptr<ast::AndOrList> Parser::andOrList()138{139 auto pl = pipeline();140141 if (pl == nullptr)142 return nullptr;143144 const SymbolType next_sym = nextSymbol();145146 ast::AndOrList::BinOpType bin_op_type{};147148 ast::Range op_range;149 op_range.begin = current_position;150 if (next_sym == SymbolType::and_if) {151 bin_op_type = ast::AndOrList::BinOpType::op_and;152 } else if (next_sym == SymbolType::or_if) {153 bin_op_type = ast::AndOrList::BinOpType::op_or;154 } else {155 return pl;156 }157 readString(operator_len(next_sym));158 op_range.end = current_position;159160 auto and_or = andOrList();161 if (and_or == nullptr) {162 setError("expected an AND-OR list");163 return nullptr;164 }165166 return std::make_unique<ast::AndOrList::BinOp>(167 bin_op_type, std::move(pl), std::move(and_or), op_range);168}169170std::unique_ptr<ast::AndOrList::Pipeline> Parser::pipeline()171{172 bool bang{false};173 ast::Position bang_pos{};174 if (peekChar(0) == '!') {175 bang = true;176 bang_pos = current_position;177 readChar();178 }179180 auto cmd = command();181 if (cmd == nullptr) {182 return nullptr;183 }184185 std::vector<std::unique_ptr<ast::Command>> cmds{};186 cmds.push_back(std::move(cmd));187188 while (peekChar(0) == '|') {189 readChar();190 linebreak();191 cmd = command();192 if (cmd == nullptr) {193 setError("expected a command");194 return nullptr;195 }196 cmds.push_back(std::move(cmd));197 }198 return std::make_unique<ast::AndOrList::Pipeline>(std::move(cmds), bang,199 bang_pos);200}201202std::unique_ptr<ast::Command> Parser::command()203{204 std::unique_ptr<ast::Command> cmd = compoundCommand();205 if (cmd != nullptr) {206 return cmd;207 }208 return simpleCommand();209}210211std::shared_ptr<ast::IORedirect> Parser::ioRedirect()212{213 ast::Position io_num_pos{};214 int io_num{-1};215 if (std::isdigit(peekChar(0))) {216 const char ch = peekChar(1);217 if (ch != '>' && ch != '<') {218 return nullptr;219 }220 io_num_pos = current_position;221 io_num = readChar() - '0';222 }223224 ast::Range op_range{};225 op_range.begin = current_position;226 const auto op = ioRedirectOp();227228 if (!op.has_value()) {229 if (io_num >= 0) {230 setError("expect an io redirect operator");231 }232 return nullptr;233 }234235 std::unique_ptr<ast::Word> name{nullptr};236237 // TODO: shoud combine 2 branches?238 if (op == ast::IORedirectOp::double_less ||239 op == ast::IORedirectOp::double_less_dash) {240 // io_here241 name = word<false>(END);242 if (name == nullptr) {243 setError("expect a name after IO here-document "244 "redirction operator");245 return nullptr;246 }247 op_range.end = current_position;248 const auto redirect = std::make_shared<ast::IORedirect>(249 io_num, op.value(), std::move(name), io_num_pos, op_range);250 BasicParser::here_documents.push_back(redirect);251 return redirect;252 } else {253 // io_file254 name = word<false>(END);255 if (name == nullptr) {256 setError("expect a filename after IO file redirection "257 "operator");258 return nullptr;259 }260 op_range.end = current_position;261 return std::make_shared<ast::IORedirect>(262 io_num, op.value(), std::move(name), io_num_pos, op_range);263 }264265 return nullptr;266}267268bool Parser::completeHeredoc(std::shared_ptr<ast::IORedirect> redir)269{270 // TODO: add position info271 const auto delim = redir->name->toStr();272 if (!delim.has_value()) {273 setError("not a stringify here document delimiter");274 return false;275 }276277 const bool trim_tabs = redir->op == ast::IORedirectOp::double_less_dash;278 const bool expand_lines = !redir->name->isQuoted();279280 while (true) {281 std::string line;282 while (true) {283 const char ch = peekChar(0);284 if (ch == END || ch == '\n')285 break;286 line += readChar();287 }288 if (trim_tabs) {289 line.erase(line.begin(),290 std::ranges::find_if(291 line.begin(), line.end(),292 [](char ch) { return ch != '\t'; }));293 }294295 if (line == delim) {296 if (peekChar(0) == '\n')297 readChar();298 break;299 }300 if (expand_lines) {301 std::istringstream in(line);302 redir->here_document.push_back(303 Parser(in).hereDocLine());304 } else {305 redir->here_document.push_back(306 std::make_unique<ast::Word::String>(line, true));307 }308 }309310 return true;311}312313std::unique_ptr<ast::Word> Parser::hereDocLine()314{315 std::string str{};316 std::vector<std::unique_ptr<ast::Word>> words{};317 ast::Position child_begin{};318 while (true) {319 child_begin = current_position;320 char ch = peekChar(0);321 if (ch == END)322 break;323324 if (ch == '$') {325 pushWordString(words, str, child_begin);326327 auto w = expectDollar();328 if (w == nullptr)329 return nullptr;330 words.push_back(std::move(w));331 continue;332 } else if (ch == '`') {333 pushWordString(words, str, child_begin);334335 words.push_back(backQuotes());336 continue;337338 } else if (ch == '\\') {339 switch (peekChar(1)) {340 case '$':341 [[fallthrough]];342 case '`':343 [[fallthrough]];344 case '\\':345 readChar();346 ch = readChar();347 break;348 }349 }350 str += ch;351 readChar();352 }353354 pushWordString(words, str, child_begin);355356 if (words.size() == 1) {357 return std::move(words[0]);358 } else {359 return std::make_unique<ast::Word::List>(std::move(words),360 false);361 }362}363364std::unique_ptr<ast::Assignment> Parser::assignment()365{366 if (nextSymbol() != SymbolType::token)367 return nullptr;368 auto name = peekName(false);369 if (!name.has_value())370 return nullptr;371 if (peekChar(name.value().length()) != '=')372 return nullptr;373374 ast::Range name_range{};375 name_range.begin = current_position;376 readString(name.value().length());377 name_range.end = current_position;378379 ast::Position equal_pos{current_position};380 readChar();381382 auto value = word<false>(END);383 if (value == nullptr)384 value = std::make_unique<ast::Word::String>("", false);385386 return std::make_unique<ast::Assignment>(name.value(), std::move(value),387 name_range, equal_pos);388}389390std::unique_ptr<ast::Command> Parser::simpleCommand()391{392 auto cmd = std::make_unique<ast::Command::SimpleCommand>();393394 // 1. command prefix395 bool has_prefix{false};396 std::shared_ptr<ast::IORedirect> redir{};397 std::unique_ptr<ast::Assignment> assign{};398 while (true) {399 redir = ioRedirect();400 if (redir != nullptr) {401 cmd->io_redirects.push_back(redir);402 has_prefix = true;403 continue;404 }405 assign = assignment();406 if (assign != nullptr) {407 cmd->assignments.push_back(std::move(assign));408 has_prefix = true;409 continue;410 }411 break;412 }413414 // 2. cmd name415416 auto w = peekWord(END);417 if (w == "") {418 cmd->name = word<false>(END);419 } else if (ast::isKeyword(w)) {420 cmd->name = nullptr;421 } else {422 auto token_begin = current_position;423 auto token = readString(w.length());424 assert(token.has_value());425 cmd->name = std::make_unique<ast::Word::String>(426 token.value(), false,427 ast::Range{token_begin, current_position});428 }429430 // cmd->name = word<false>(END);431 if (cmd->name == nullptr) {432 if (!has_prefix)433 return nullptr;434 else {435 return cmd;436 }437 }438439 // 3. suffix (args + io_redirect )440 std::unique_ptr<ast::Word> arg{};441 while (true) {442 redir = ioRedirect();443 if (redir != nullptr) {444 cmd->io_redirects.push_back(std::move(redir));445 has_prefix = true;446 continue;447 }448449 arg = word<false>(0);450 if (arg != nullptr) {451 cmd->arguments.push_back(std::move(arg));452 has_prefix = true;453 continue;454 }455 break;456 }457458 return cmd;459}460461std::unique_ptr<ast::Command> Parser::compoundCommand()462{463 std::unique_ptr<ast::Command> cmd{};464 cmd = braceGroup();465 if (cmd != nullptr)466 return cmd;467 else if (error != nullptr)468 return nullptr;469470 cmd = subshell();471 if (cmd != nullptr)472 return cmd;473 else if (error != nullptr)474 return nullptr;475476 cmd = ifClause();477 if (cmd != nullptr)478 return cmd;479 else if (error != nullptr)480 return nullptr;481482 cmd = forClause();483 if (cmd != nullptr)484 return cmd;485 else if (error != nullptr)486 return nullptr;487488 cmd = loopClause();489 if (cmd != nullptr)490 return cmd;491 else if (error != nullptr)492 return nullptr;493494 cmd = caseClause();495 if (cmd != nullptr)496 return cmd;497 else if (error != nullptr)498 return nullptr;499500 cmd = functionDefine();501 if (cmd != nullptr)502 return cmd;503 else if (error != nullptr)504 return nullptr;505506 return nullptr;507};508509std::unique_ptr<ast::Command::BraceGroup> Parser::braceGroup()510{511 ast::Position lbrace_pos{current_position}, rbrace_pos{};512 if (!token<false>("{"))513 return nullptr;514 auto cmds = compoundList<true>();515 if (cmds.size() == 0)516 return nullptr;517518 rbrace_pos = current_position;519 if (!BasicParser::expect("}"))520 return nullptr;521522 return std::make_unique<ast::Command::BraceGroup>(523 std::move(cmds), lbrace_pos, rbrace_pos);524}525526std::unique_ptr<ast::Command::SubShell> Parser::subshell()527{528 ast::Position lparen_pos{current_position}, rparen_pos{};529 if (!token<false>("("))530 return nullptr;531532 auto cmds = compoundList<true>();533 if (cmds.size() == 0)534 return nullptr;535536 rparen_pos = current_position;537 if (!BasicParser::expect(")"))538 return nullptr;539540 return std::make_unique<ast::Command::SubShell>(std::move(cmds),541 lparen_pos, rparen_pos);542}543544std::unique_ptr<ast::Command::IfClause> Parser::ifClause()545{546 ast::Range if_range{}, then_range{}, fi_range{}; // else_range{};547548 if_range.begin = current_position;549 if (!token<false>("if"))550 return nullptr;551552 if_range.end = current_position;553554 std::vector<std::unique_ptr<ast::CommandList>> cond{}, then{};555 std::unique_ptr<ast::Command> ep;556 cond = compoundList<true>();557 if (cond.size() == 0)558 return nullptr;559560 then_range.begin = current_position;561 if (!BasicParser::expect("then"))562 return nullptr;563 then_range.end = current_position;564565 then = compoundList<true>();566 if (then.size() == 0)567 return nullptr;568569 // TODO: else range570 ep = elsePart();571572 fi_range.begin = current_position;573 if (!BasicParser::expect("fi"))574 return nullptr;575 fi_range.end = current_position;576577 auto if_clause = std::make_unique<ast::Command::IfClause>(578 std::move(cond), std::move(then), std::move(ep));579 if_clause->if_range = if_range;580 if_clause->then_range = then_range;581 if_clause->fi_range = fi_range;582583 return if_clause;584}585586std::unique_ptr<ast::Command> Parser::elsePart()587{588 ast::Range if_range{}, then_range{};589 if_range.begin = current_position;590 if (token<false>("elif")) {591 if_range.end = current_position;592593 std::vector<std::unique_ptr<ast::CommandList>> cond{}, then{};594 std::unique_ptr<ast::Command> ep;595596 cond = compoundList<true>();597 if (cond.size() == 0)598 return nullptr;599 then_range.begin = current_position;600601 if (!BasicParser::expect("then"))602 return nullptr;603604 then_range.end = current_position;605606 then = compoundList<true>();607 if (then.size() == 0)608 return nullptr;609610 // TODO: else range611 ep = elsePart();612 auto if_clause = std::make_unique<ast::Command::IfClause>(613 std::move(cond), std::move(then), std::move(ep));614 if_clause->if_range = if_range;615 if_clause->then_range = then_range;616617 return if_clause;618 }619620 if (token<false>("else")) {621 // TODO: position information(else_range) is missing622 auto body = compoundList<true>();623 if (body.size() == 0)624 return nullptr;625 return std::make_unique<ast::Command::BraceGroup>(626 std::move(body));627 }628 return nullptr;629}630631std::unique_ptr<ast::Command::ForClause> Parser::forClause()632{633 ast::Range for_range{current_position, current_position};634 if (!token<false>("for")) {635 return nullptr;636 }637 for_range.end = current_position;638639 auto name = peekName(false);640 if (!name.has_value()) {641 setError("expect a name");642 return nullptr;643 }644 ast::Range name_range{current_position, current_position};645 name = BasicParser::readString(name.value().length());646 assert(name.has_value());647 name_range.end = current_position;648649 ast::Range in_range{current_position, current_position};650 auto in = token<false>("in");651 in_range.end = current_position;652653 std::vector<std::unique_ptr<ast::Word>> words{};654 if (in) {655 BasicParser::readString(std::string{"in"}.length());656657 words = wordList<ast::Word>(658 [this](char end) { return word<false>(end); }, END);659 if (BasicParser::expect(";")) {660 linebreak();661 } else {662 return nullptr;663 }664 } else {665 token<false>(";");666 linebreak();667 }668669 auto [do_group, do_range, done_range] = expectDoGroup();670 if (do_group.size() == 0)671 return nullptr;672 auto cmd = std::make_unique<ast::Command::ForClause>(673 in, std::move(words), std::move(do_group));674675 cmd->for_range = for_range;676 cmd->name_range = name_range;677 cmd->in_range = in_range;678 cmd->do_range = do_range;679 cmd->done_range = done_range;680681 return cmd;682}683684std::unique_ptr<ast::Command::LoopClause> Parser::loopClause()685{686 ast::Command::LoopClauseType type{};687 ast::Range while_until_range{};688 while_until_range.begin = current_position;689 if (token<false>("while"))690 type = ast::Command::LoopClauseType::loop_while;691 else if (token<false>("until"))692 type = ast::Command::LoopClauseType::loop_until;693 else694 return nullptr;695 while_until_range.end = current_position;696697 std::vector<std::unique_ptr<ast::CommandList>> cond{};698699 cond = compoundList<true>();700 if (cond.size() == 0)701 return nullptr;702703 auto [body, do_range, done_range] = expectDoGroup();704 // TODO: save position information705706 if (body.size() == 0)707 return nullptr;708709 auto cmd = std::make_unique<ast::Command::LoopClause>(710 type, std::move(cond), std::move(body));711712 cmd->while_until_range = while_until_range;713 cmd->do_range = do_range;714 cmd->done_range = done_range;715 return cmd;716}717718std::tuple<std::vector<std::unique_ptr<ast::CommandList>>, ast::Range,719 ast::Range>720Parser::expectDoGroup()721{722 ast::Range do_range{}, done_range{};723 do_range.begin = current_position;724 if (!token<true>("do"))725 return {};726 do_range.end = current_position;727 auto body = compoundList<true>();728 if (body.size() == 0)729 return {};730731 done_range.begin = current_position;732 if (!token<true>("done"))733 return {};734 done_range.end = current_position;735736 return std::make_tuple(std::move(body), do_range, done_range);737}738739std::unique_ptr<ast::Command::CaseClause> Parser::caseClause()740{741 ast::Range case_range;742 case_range.begin = current_position;743 if (!token<false>("case"))744 return nullptr;745 case_range.end = current_position;746747 auto w = word<true>(END);748 if (w == nullptr)749 return nullptr;750 linebreak();751752 ast::Range in_range;753 in_range.begin = current_position;754 if (!token<true>("in"))755 return nullptr;756 in_range.end = current_position;757 linebreak();758759 std::vector<std::unique_ptr<ast::CaseItem>> items{};760 ast::Range esac_range{current_position, current_position};761 while (!token<false>("esac")) {762 auto i = expectCaseItem();763 if (i.first == nullptr)764 return nullptr;765 items.push_back(std::move(i.first));766 if (!i.second) {767 esac_range.begin = current_position;768 if (!token<true>("esac"))769 return nullptr;770 break;771 }772 esac_range.begin = current_position;773 }774 esac_range.end = current_position;775 auto cmd = std::make_unique<ast::Command::CaseClause>(std::move(w),776 std::move(items));777 cmd->case_range = case_range;778 cmd->in_range = in_range;779 cmd->esac_range = esac_range;780 return cmd;781}782783std::pair<std::unique_ptr<struct ast::CaseItem>, bool> Parser::expectCaseItem()784{785 bool dsemi = false;786 ast::Position lparen_pos{current_position}, rparen_pos{};787788 if (token<false>("("))789 lparen_pos = ast::Position{};790791 auto w = word<true>(END);792 if (w == nullptr)793 return {nullptr, false};794795 std::vector<std::unique_ptr<ast::Word>> patterns;796 patterns.push_back(std::move(w));797798 while (token<false>("|")) {799 auto w = word<true>(END);800 if (w == nullptr)801 return {nullptr, false};802 patterns.push_back(std::move(w));803 }804805 rparen_pos = current_position;806 if (!token<true>(")"))807 return {nullptr, false};808809 auto body = compoundList<false>();810 if (error != nullptr)811 return {nullptr, false};812813 ast::Range dsemi_range{};814 if (nextSymbol() == SymbolType::double_semi) {815 dsemi_range.begin = current_position;816 readString(operator_len(SymbolType::double_semi));817 dsemi_range.end = current_position;818 dsemi = true;819 linebreak();820 }821822 return {std::make_unique<ast::CaseItem>(std::move(patterns),823 std::move(body), lparen_pos,824 rparen_pos, dsemi_range),825 dsemi};826}827828std::unique_ptr<ast::Command::FunctionDefine> Parser::functionDefine()829{830 auto name = peekName(false);831 if (!name.has_value())832 return nullptr;833834 std::size_t i = name.value().size();835 while (true) {836 const char ch = peekChar(i);837 if (ch == '(')838 break;839 if (!std::isblank(ch))840 return nullptr;841 i += 1;842 }843 ast::Range name_range{current_position, current_position};844 name = readString(i);845 name_range.end = current_position;846847 ast::Position lparen_pos{current_position};848 if (!token<true>("("))849 return nullptr;850 ast::Position rparen_pos{current_position};851 if (!token<true>(")"))852 return nullptr;853 linebreak();854855 auto cmd = compoundCommand();856857 if (cmd == nullptr) {858 setError("expect a compound command");859 return nullptr;860 }861862 auto func_def = std::make_unique<ast::Command::FunctionDefine>(863 name.value(), std::move(cmd));864 while (true) {865 auto redir = ioRedirect();866 if (redir == nullptr)867 break;868 func_def->io_redirects.push_back(redir);869 }870 func_def->name_range = name_range;871 func_def->lparen_pos = lparen_pos;872 func_def->rparen_pos = rparen_pos;873874 return func_def;875}876877template <bool expect>878std::vector<std::unique_ptr<ast::CommandList>> Parser::compoundList()879{880 std::vector<std::unique_ptr<ast::CommandList>> cmds{};881882 linebreak();883 auto l = commandList<true>();884 if (l == nullptr) {885 if (expect)886 setError("expect a compound list");887 return {};888 }889 cmds.push_back(std::move(l));890 while (true) {891 l = commandList<true>();892 if (l == nullptr)893 break;894 cmds.push_back(std::move(l));895 }896 return cmds;897};