mush

git clone git://git.lin.moe/mush.git

  1#include "Ast.hpp"
  2#include "Parser.hpp"
  3#include <cassert>
  4#include <cctype>
  5#include <memory>
  6#include <sstream>
  7
  8using namespace parsing;
  9
 10std::unique_ptr<ast::Word::String> Parser::singleQuotes()
 11{
 12	std::string str{};
 13	auto begin = current_position;
 14
 15	char c = BasicParser::readChar();
 16	assert(c == '\'');
 17
 18	while (true) {
 19		c = BasicParser::peekChar(0);
 20		if (c == '\0') {
 21			setError("single quotes not terminated");
 22			return nullptr;
 23		}
 24		if (c == '\'') {
 25			BasicParser::readChar();
 26			break;
 27		}
 28		if (c == '\n') {
 29			continue_line = true;
 30		}
 31
 32		BasicParser::readChar();
 33		str += c;
 34	}
 35
 36	return std::make_unique<ast::Word::String>(
 37	    str, false, ast::Range{begin, current_position});
 38}
 39
 40std::unique_ptr<ast::Word::List> Parser::doubleQuotes()
 41{
 42
 43	assert(readChar() == '"');
 44
 45	std::string buf{};
 46	ast::Position child_begin{};
 47	std::vector<std::unique_ptr<ast::Word>> children{};
 48
 49	while (true) {
 50		child_begin = current_position;
 51		char c = peekChar(0);
 52		if (c == END) {
 53			setError("double quotes not terminated");
 54			return nullptr;
 55		}
 56		if (c == '"') {
 57			pushWordString(children, buf, child_begin);
 58			readChar();
 59			break;
 60		}
 61		if (c == '$') {
 62			pushWordString(children, buf, child_begin);
 63			auto t = expectDollar();
 64			if (t == nullptr)
 65				return nullptr;
 66			children.push_back(std::move(t));
 67			continue;
 68		}
 69		if (c == '`') {
 70			auto t = backQuotes();
 71			if (t == nullptr)
 72				return nullptr;
 73			children.push_back(std::move(t));
 74			continue;
 75		}
 76
 77		if (c == '\\') {
 78			c = peekChar(1);
 79			switch (c) {
 80			case '$':
 81				[[fallthrough]];
 82			case '`':
 83				[[fallthrough]];
 84			case '"':
 85				[[fallthrough]];
 86			case '\\':
 87				readChar();
 88				break;
 89			}
 90			if (c == '\n') {
 91				readChar();
 92				readChar();
 93				continue_line = true;
 94				continue;
 95			}
 96		}
 97
 98		readChar();
 99		buf += c;
100	}
101
102	// TODO: handle position
103	return std::make_unique<ast::Word::List>(std::move(children), true);
104}
105
106std::unique_ptr<ast::Word::Command> Parser::backQuotes()
107{
108	ast::Position begin = current_position;
109
110	char c = readChar();
111	assert(c == '`');
112	std::string buf{};
113	while (true) {
114		c = peekChar(0);
115		if (c == END) {
116			setError("back quotes not terminated");
117			return nullptr;
118		}
119		if (c == '`') {
120			readChar();
121			break;
122		}
123		if (c == '\\') {
124			switch (peekChar(1)) {
125			case '$':
126				[[fallthrough]];
127			case '`':
128				[[fallthrough]];
129			case '\\':
130				readChar();
131				c = peekChar(0);
132				break;
133			}
134		}
135		if (c == '\n') {
136			continue_line = true;
137		}
138		readChar();
139		buf += c;
140	}
141
142	std::istringstream in(buf);
143	auto subparser = Parser{in};
144
145	auto prog = subparser.expectProgram();
146	if (subparser.error != nullptr) {
147		setError(subparser.error->what());
148		return nullptr;
149	}
150
151	return std::make_unique<ast::Word::Command>(
152	    std::move(prog), true, ast::Range{begin, current_position});
153}
154
155std::unique_ptr<ast::Word> Parser::expectDollar()
156{
157	ast::Position dollar_pos = current_position;
158	char c = BasicParser::readChar();
159	assert(c == '$');
160
161	c = BasicParser::peekChar(0);
162	std::unique_ptr<ast::Word::Parameter> param{nullptr};
163
164	switch (c) {
165	case '{': // Parameter expansion in the form `${expression}`
166		param = expectParameter();
167		if (param == nullptr)
168			return nullptr;
169		param->dollar_pos = dollar_pos;
170		return param;
171	case '(':
172		if (BasicParser::peekChar(1) == '(') {
173			return expectArithmetic();
174		} else {
175			return expectWordCommand();
176		}
177	default:
178		auto name = peekName(false);
179
180		if (!name.has_value()) {
181			bool ok = false;
182			switch (c) {
183			case '@':
184			case '*':
185			case '#':
186			case '?':
187			case '-':
188			case '$':
189			case '!':
190				ok = true;
191				break;
192			default:
193				ok = isdigit(c);
194			}
195			if (ok) {
196				name = std::string(1, c);
197			} else {
198				setError("invalid parameter name");
199				return nullptr;
200			}
201		}
202
203		assert(name.has_value());
204		ast::Position name_begin = current_position;
205		name = BasicParser::readString(name.value().length());
206
207		auto param = std::make_unique<ast::Word::Parameter>(
208		    name.value(), ast::Word::ParameterOp::none,
209		    std::unique_ptr<ast::Word>{});
210		param->name_range = ast::Range{name_begin, current_position};
211		return param;
212	}
213}
214
215std::unique_ptr<ast::Word::Command> Parser::expectWordCommand()
216{
217	char c = BasicParser::readChar();
218	assert(c == '(');
219
220	auto prog = expectProgram();
221	if (prog == nullptr) {
222		return nullptr;
223	}
224
225	if (!expect(")")) {
226		return nullptr;
227	}
228	return std::make_unique<ast::Word::Command>(std::move(prog), false);
229}
230
231std::unique_ptr<ast::Word::Arithmetic> Parser::expectArithmetic()
232{
233	char c = readChar();
234	assert(c == '(');
235	c = readChar();
236	assert(c == '(');
237
238	arith_nested_parens = 0;
239
240	auto body = wordList<ast::Word>(
241	    [this](char end) { return arithmeticWord(end); }, '\0');
242
243	auto arithm = std::make_unique<ast::Word::Arithmetic>(
244	    std::make_unique<ast::Word::List>(std::move(body), false));
245
246	c = readChar();
247	if (c != ')') {
248		setError("excepted )");
249		return nullptr;
250	}
251	c = readChar();
252	if (c != ')') {
253		setError("excepted )");
254		return nullptr;
255	}
256
257	return arithm;
258}
259
260std::optional<ast::Word::ParameterOp> Parser::expectParameterOp()
261{
262	char ch = peekChar(0);
263	bool coloned{false};
264
265	if (ch == ':') {
266		coloned = true;
267		readChar();
268		ch = peekChar(0);
269	}
270
271	switch (ch) {
272	case '-':
273		readChar();
274		if (coloned)
275			return ast::Word::ParameterOp::coloned_minus;
276		else
277			return ast::Word::ParameterOp::minus;
278	case '=':
279		readChar();
280		if (coloned)
281			return ast::Word::ParameterOp::coloned_equal;
282		else
283			return ast::Word::ParameterOp::equal;
284	case '?':
285		readChar();
286		if (coloned)
287			return ast::Word::ParameterOp::coloned_qmark;
288		else
289			return ast::Word::ParameterOp::qmark;
290	case '+':
291		readChar();
292		if (coloned)
293			return ast::Word::ParameterOp::coloned_plus;
294		else
295			return ast::Word::ParameterOp::plus;
296	}
297	if (coloned) {
298		setError("expect a parameter operation");
299		return {};
300	}
301
302	switch (ch) {
303	case '%':
304		readChar();
305		if (peekChar(0) == ch) {
306			readChar();
307			return ast::Word::ParameterOp::double_percent;
308		} else {
309			return ast::Word::ParameterOp::percent;
310		}
311	case '#':
312		readChar();
313		if (peekChar(0) == ch) {
314			readChar();
315			return ast::Word::ParameterOp::double_hash;
316		} else {
317			return ast::Word::ParameterOp::hash;
318		}
319	default:
320		setError("expect a parameter operation");
321		return {};
322	}
323}
324
325std::unique_ptr<ast::Word::Parameter> Parser::expectParameter()
326{
327
328	assert(readChar() == '{');
329	ast::Position lbrace_pos = current_position;
330
331	auto op = ast::Word::ParameterOp::none;
332	ast::Range op_range{};
333
334	if (peekChar(0) == '#') {
335		op_range.begin = current_position;
336		readChar();
337		op_range.end = current_position;
338		op = ast::Word::ParameterOp::leading_hash;
339	}
340
341	std::unique_ptr<ast::Word> args{nullptr};
342
343	ast::Range name_range;
344	auto name = peekName(true);
345	if (!name.has_value()) {
346		setError("expected a parameter");
347		return nullptr;
348	}
349	name_range.begin = current_position;
350	readString(name.value().length());
351	name_range.end = current_position;
352
353	if (op == ast::Word::ParameterOp::none && peekChar(0) != '}') {
354		op_range.begin = current_position;
355		auto i = expectParameterOp();
356		if (!i.has_value())
357			return nullptr;
358		op_range.end = current_position;
359		op = i.value();
360
361		auto arglist = wordList<ast::Word>(
362		    [this](char end) { return word<false>(end); }, '}');
363		if (arglist.size() == 1) {
364			args = std::move(arglist[0]);
365		} else if (arglist.size() > 1) {
366			args = std::make_unique<ast::Word::List>(
367			    std::move(arglist), false);
368		}
369	}
370
371	ast::Position rbrace_pos = current_position;
372	if (readChar() != '}') {
373		setError("expected end of parameter");
374		return nullptr;
375	}
376
377	auto param = std::make_unique<ast::Word::Parameter>(name.value(), op,
378							    std::move(args));
379
380	param->name_range = name_range;
381	param->op_range = op_range;
382	param->lbrace_pos = lbrace_pos;
383	param->rbrace_pos = rbrace_pos;
384
385	return param;
386}
387
388template <typename T>
389std::vector<std::unique_ptr<T>>
390Parser::wordList(std::function<std::unique_ptr<T>(char)> func, const char end)
391{
392	std::vector<std::unique_ptr<T>> vec{};
393
394	while (true) {
395		if (peekChar(0) == end)
396			break;
397		auto el = func(end);
398		if (el == nullptr)
399			break;
400		vec.push_back(std::move(el));
401
402		std::string str{};
403		auto begin = current_position;
404		while (true) {
405			if (!isblank(peekChar(0)))
406				break;
407			str += readChar();
408		}
409		if (str.length() != 0) {
410			vec.push_back(std::make_unique<ast::Word::String>(
411			    str, false, ast::Range(begin, current_position)));
412		}
413	}
414
415	return vec;
416}
417
418template std::unique_ptr<ast::Word> Parser::word<true>(char end);
419template std::unique_ptr<ast::Word> Parser::word<false>(char end);
420template <bool expect> std::unique_ptr<ast::Word> Parser::word(char end)
421{
422	if (nextSymbol() != SymbolType::token) {
423		if (expect)
424			setError("expect a word");
425		return nullptr;
426	}
427
428	auto ch = peekChar(0);
429	if (ch == end || ch == ')' ||
430	    ch == ';') // ')' is for parse dolloar command
431		return nullptr;
432	if (is_operator_start(ch))
433		return nullptr;
434
435	ast::Position child_begin{};
436	std::vector<std::unique_ptr<ast::Word>> children{};
437	std::string buf{};
438	while (true) {
439		child_begin = current_position;
440		char c = peekChar(0);
441
442		if (c == END || c == '\n' || c == ')' || c == end) {
443			break;
444		}
445		if (c == '$') {
446			pushWordString(children, buf, child_begin);
447			auto t = expectDollar();
448			if (t == nullptr)
449				return nullptr;
450			children.push_back(std::move(t));
451			continue;
452		} else if (c == '`') {
453			pushWordString(children, buf, child_begin);
454			auto t = backQuotes();
455			if (t == nullptr)
456				return nullptr;
457			children.push_back(std::move(t));
458			continue;
459		} else if (c == '\'') {
460			pushWordString(children, buf, child_begin);
461			auto t = singleQuotes();
462			if (t == nullptr)
463				return nullptr;
464			children.push_back(std::move(t));
465			continue;
466		} else if (c == '"') {
467			pushWordString(children, buf, child_begin);
468			auto t = doubleQuotes();
469			if (t == nullptr)
470				return nullptr;
471			children.push_back(std::move(t));
472			continue;
473		} else if (c == '\\') {
474			readChar();
475			c = peekChar(0);
476			if (c == '\n') {
477				readChar();
478				continue_line = true;
479				continue;
480			}
481		} else if (std::isblank(c) || is_operator_start(c)) {
482			break;
483		}
484		buf += readChar();
485	}
486	pushWordString(children, buf, child_begin);
487	if (children.size() == 0) {
488		if (expect)
489			setError("expect a word");
490		return nullptr;
491	} else if (children.size() == 1)
492		return std::move(children[0]);
493	else
494		return std::make_unique<ast::Word::List>(std::move(children),
495							 false);
496}
497
498std::unique_ptr<ast::Word> Parser::arithmeticWord(char end)
499{
500	if (peekChar(0) == ')') {
501		if (peekChar(1) == ')' && arith_nested_parens == 0)
502			return nullptr;
503	}
504	std::vector<std::unique_ptr<ast::Word>> vec{};
505
506	std::string str{};
507	auto begin = current_position;
508	while (true) {
509		begin = current_position;
510		auto c = peekChar(0);
511		if ((c == '\0' || c == '\n' || c == ';' || std::isblank(c) ||
512		     c == end) &&
513		    arith_nested_parens == 0) {
514			break;
515		}
516		if (c == ')' && peekChar(1) == ')' &&
517		    arith_nested_parens == 0) {
518			break;
519		}
520		if (c == '$') {
521			pushWordString(vec, str, begin);
522			auto t = expectDollar();
523			if (t == nullptr)
524				return nullptr;
525			vec.push_back(std::move(t));
526		}
527
528		if (c == '`') {
529			pushWordString(vec, str, begin);
530			auto t = backQuotes();
531			if (t == nullptr)
532				return nullptr;
533			vec.push_back(std::move(t));
534		}
535		if (c == '\'') {
536			pushWordString(vec, str, begin);
537			auto t = singleQuotes();
538			if (t == nullptr)
539				return nullptr;
540			vec.push_back(std::move(t));
541		}
542		if (c == '"') {
543			pushWordString(vec, str, begin);
544			auto t = doubleQuotes();
545			if (t == nullptr)
546				return nullptr;
547			vec.push_back(std::move(t));
548		}
549
550		if (c == '\\') {
551			readChar();
552			c = peekChar(0);
553			if (c == '\n') {
554				// read_continuation_line(parser);
555				continue;
556			}
557		}
558		if ((c == '<' && peekChar(1) == '<') ||
559		    (c == '>' && peekChar(1) == '>'))
560			str += readChar();
561		if (c == '(') {
562			arith_nested_parens += 1;
563		} else if (c == ')') {
564			if (arith_nested_parens == 0) {
565				setError("unmatched closing parenthesis in "
566					 "arithmetic expression");
567				return nullptr;
568			}
569			arith_nested_parens -= 1;
570		}
571		readChar();
572		str += c;
573	}
574
575	pushWordString(vec, str, begin);
576
577	if (vec.size() == 1) {
578		return std::move(vec[0]);
579	} else {
580		return std::make_unique<ast::Word::List>(std::move(vec), false);
581	}
582}