mush

git clone git://git.lin.moe/mush.git

  1#include "Parser.hpp"
  2#include "Ast.hpp"
  3#include <algorithm>
  4#include <cassert>
  5#include <cctype>
  6#include <cstddef>
  7#include <format>
  8using namespace parsing;
  9
 10ParseError::ParseError(ast::Position position, std::string message)
 11    : std::logic_error{message}, position(position)
 12{
 13}
 14
 15BasicParser::BasicParser(std::istream &stream)
 16    : in(stream), buffer{}, continue_line{}, error{}
 17{
 18}
 19
 20Parser::Parser(std::istream &stream)
 21    : BasicParser{stream}, arith_nested_parens{0}
 22{
 23}
 24
 25char BasicParser::peekChar(const std::size_t index)
 26{
 27	const std::size_t old_size = buffer.size();
 28	if (index < old_size) {
 29		return buffer[buffer.size() - index - 1];
 30	}
 31
 32	const std::size_t needed_chars = index + 1;
 33	const std::size_t chars_to_read = needed_chars - old_size;
 34
 35	buffer.resize(needed_chars);
 36
 37	if (old_size > 0)
 38		std::copy_backward(buffer.begin(), buffer.begin() + old_size,
 39				   buffer.end());
 40
 41	char ch{};
 42	for (std::size_t i = 0; i < chars_to_read; ++i) {
 43		if (!in.get(ch)) {
 44			buffer.erase(buffer.begin(),
 45				     buffer.begin() + (chars_to_read - i));
 46			// buffer.resize(old_size + i);
 47			return END;
 48		}
 49		buffer[chars_to_read - 1 - i] = ch;
 50	}
 51
 52	return *(buffer.end() - index - 1);
 53}
 54
 55char BasicParser::readChar(void)
 56{
 57	continue_line = false;
 58	char ch{};
 59	if (buffer.size() == 0)
 60		peekChar(0);
 61	if (buffer.size() == 0)
 62		return (END);
 63
 64	ch = buffer.back();
 65	buffer.pop_back();
 66	if (ch == '\n')
 67		current_position.addLine();
 68	else
 69		current_position.addCol();
 70
 71	return (ch);
 72}
 73
 74std::optional<std::string> BasicParser::readString(std::size_t len)
 75{
 76	continue_line = false;
 77	peekChar(len - 1);
 78	if (buffer.size() < len)
 79		return {};
 80
 81	std::string str{buffer.end() - len, buffer.end()};
 82	buffer.resize(buffer.size() - len);
 83	std::reverse(str.begin(), str.end());
 84
 85	for (const char &ch : str) {
 86		if (ch == '\n')
 87			current_position.addLine();
 88		else
 89			current_position.addCol();
 90	}
 91	return str;
 92}
 93
 94std::string BasicParser::peekWord(char end)
 95{
 96	std::string str{};
 97
 98	for (std::size_t i = 0; true; i += 1) {
 99		const char ch = peekChar(i);
100		switch (ch) {
101		case '\0':
102		case '\n':
103		case ')':
104			return str;
105		case '$':
106		case '`':
107		case '\'':
108		case '"':
109		case '\\': // TODO: allow backslash in words
110			return {};
111		}
112		if (std::isblank(ch) || ch == end) {
113			return str;
114		}
115		if (is_operator_start(ch))
116			return str;
117		str += ch;
118	}
119}
120
121// TODO: optimize
122SymbolType BasicParser::nextSymbol(void)
123{
124	const char ch = peekChar(0);
125	if (ch == END)
126		return SymbolType::end;
127	if (ch == '\n')
128		return SymbolType::newline;
129
130	if (std::isblank(ch)) {
131		readChar();
132		// somewhere call this method to comsume blank
133		// TODO: may not call self here
134		return nextSymbol();
135	}
136	if (ch == '#') {
137		while (true) {
138			const char c = peekChar(1);
139			if (c == END || c == '\n')
140				break;
141			readChar();
142		}
143		return nextSymbol();
144	}
145
146	for (const auto &i : operators) {
147		const SymbolType type{i.first};
148		const std::string_view oper{i.second};
149
150		for (std::size_t j = 0; j < oper.length(); j++) {
151			const char ch = peekChar(j);
152			if (oper[j] != ch || ch == END)
153				goto next_operator;
154		}
155		return type;
156	next_operator:;
157	}
158	return SymbolType::token;
159}
160
161bool BasicParser::expect(const std::string str)
162{
163	for (std::size_t i = 0; i < str.length(); i++) {
164		if (str[i] != peekChar(i)) {
165			setError(std::format("expect string {}", str));
166			return false;
167		}
168	}
169	readString(str.length());
170	return true;
171};
172void BasicParser::setError(const std::string msg)
173{
174	error = std::make_shared<ParseError>(current_position, msg);
175}
176void BasicParser::clearError() { error = nullptr; }
177
178std::optional<std::string> Parser::peekName(bool in_braces)
179{
180	if (BasicParser::nextSymbol() != SymbolType::token)
181		return {};
182
183	std::string str{};
184	std::size_t i{0};
185
186	while (true) {
187		char c = peekChar(i);
188		if (c != '_' && !isalnum(c)) {
189			break;
190		} else if (i == 0 && isdigit(c) && !in_braces) {
191			break;
192		}
193		str += c;
194		i++;
195	}
196	if (str.size() == 0)
197		return {};
198	return str;
199}
200std::optional<ast::IORedirectOp> Parser::ioRedirectOp()
201{
202	if (token<false>("<")) {
203		return ast::IORedirectOp::less;
204	} else if (token<false>(">")) {
205		return ast::IORedirectOp::greate;
206	}
207
208	const SymbolType next_sym = nextSymbol();
209	ast::IORedirectOp op{};
210	switch (next_sym) {
211	case SymbolType::less_and:
212		op = ast::IORedirectOp::less_and;
213		break;
214	case SymbolType::great_and:
215		op = ast::IORedirectOp::great_and;
216		break;
217	case SymbolType::double_great:
218		op = ast::IORedirectOp::double_great;
219		break;
220	case SymbolType::clobber:
221		op = ast::IORedirectOp::clobber;
222		break;
223	case SymbolType::less_great:
224		op = ast::IORedirectOp::less_great;
225		break;
226	case SymbolType::double_less:
227		op = ast::IORedirectOp::double_less;
228		break;
229	case SymbolType::double_less_dash:
230		op = ast::IORedirectOp::double_less_dash;
231		break;
232	default:
233		return {};
234	}
235
236	readString(operator_len(next_sym));
237	return op;
238}
239int Parser::linebreak()
240{
241	int count{0};
242	char ch{};
243	while (BasicParser::nextSymbol() == SymbolType::newline) {
244		count += 1;
245		ch = readChar();
246		assert(ch == '\n');
247	}
248	return count;
249}
250
251std::optional<char> Parser::separatorOp()
252{
253	// NOTICE: importance of nextSymbol
254	if (BasicParser::nextSymbol() != SymbolType::token) {
255		return {};
256	}
257	const char ch = peekChar(0);
258	if (ch == '&' || ch == ';')
259		return readChar();
260	return {};
261}
262
263template bool Parser::token<false>(std::string_view str);
264template bool Parser::token<true>(std::string_view str);
265template <bool expect> bool Parser::token(std::string_view str)
266{
267	if (nextSymbol() != SymbolType::token) {
268		if (expect)
269			setError(std::format("expect token {}", str));
270		return false;
271	}
272	if (str.length() == 1) {
273		if (peekChar(0) == str[0]) {
274			readChar();
275		} else {
276			if (expect)
277				setError(std::format("expect token {}", str));
278			return false;
279		}
280	} else {
281		const std::string word = peekWord(END);
282		if (word != str) {
283			if (expect)
284				setError(std::format("expect token {}", str));
285			return false;
286		} else {
287			readString(word.length());
288		}
289	}
290	return true;
291}
292
293void Parser::pushWordString(std::vector<std::unique_ptr<ast::Word>> &vec,
294			    std::string &str, ast::Position begin)
295{
296	if (str.length() != 0) {
297		vec.push_back(std::make_unique<ast::Word::String>(
298		    str, false, ast::Range(begin, current_position)));
299		str = "";
300	}
301}
302
303std::size_t parsing::operator_len(SymbolType op)
304{
305	if (op == SymbolType::double_less_dash)
306		return 3;
307	else
308		return 2;
309}
310
311bool parsing::is_operator_start(char ch)
312{
313	for (const auto &op : operators) {
314		if (op.second[0] == ch)
315			return true;
316	}
317	return false;
318}