mush

git clone git://git.lin.moe/mush.git

  1#pragma once
  2#include <memory>
  3#include <optional>
  4#include <set>
  5#include <string>
  6#include <vector>
  7
  8namespace ast
  9{
 10const std::set<std::string_view> keywords{
 11    "if",   "then",  "else",  "elif", "fi", "do", "done", "case",
 12    "esac", "while", "until", "for",  "{",  "}",  "!",	  "in",
 13};
 14bool isKeyword(std::string_view);
 15
 16struct Position {
 17	std::size_t offset;
 18	int line, column;
 19
 20	Position();
 21	void addCol();
 22	void addLine();
 23};
 24
 25struct Range {
 26	Position begin, end;
 27};
 28
 29/*
 30Node
 31 + Word
 32   - String
 33   - Parameter
 34   - Command
 35   - Arithmetic
 36   - List
 37 + Command
 38   - SimpleCommand
 39
 40   - BraceGroup
 41   - Subshell
 42
 43   - IfClause
 44   - ForClause
 45   - LoopClause
 46   - CaseClause
 47   - FunctionDefine
 48 + AndOrList
 49   - Pipeline
 50   - BinOp
 51*/
 52
 53enum class NodeType {
 54	program,
 55	command_list,
 56	and_or_list,
 57	command,
 58	word,
 59};
 60class Program;
 61class CommandList;
 62class IORedirect;
 63class Assignment;
 64
 65class Node
 66{
 67      public:
 68	const NodeType type;
 69};
 70
 71enum class WordType {
 72	string,
 73	parameter,
 74	command,
 75	arithmetic,
 76	list,
 77};
 78
 79class Word : public Node
 80{
 81      public:
 82	Word(WordType type);
 83	virtual ~Word() = default;
 84	const WordType type;
 85
 86	class String;
 87	enum class ParameterOp;
 88	class Parameter;
 89	class Command;
 90	class Arithmetic;
 91	class List;
 92
 93	std::optional<std::string> toStr();
 94	bool isQuoted();
 95};
 96
 97class Word::String : public Word
 98{
 99      public:
100	String(std::string str, bool single_quoted);
101	String(std::string str, bool single_quoted, Range range);
102
103	std::string str;
104	bool single_quoted;
105	Range range;
106};
107
108enum class Word::ParameterOp {
109	none,	       // `$name` or `${parameter}`  no-op
110	minus,	       // `${parameter-[word]}`
111	coloned_minus, // `${parameter:-[word]}` Use Default Values
112	equal,	       // `${parameter=[word]}`
113	coloned_equal, // `${parameter:=[word]}` Assign Default Values
114	qmark,	       // `${parameter?[word]}`
115	coloned_qmark, // `${parameter:?[word]}` Indicate Error if Null or Unset
116	plus,	       // ${parameter+[word]}
117	coloned_plus,  // `${parameter:+[word]}` Use Alternative Value
118
119	leading_hash,	// `${#parameter}`        String Length
120	percent,	// `${parameter%[word]}`  Remove Smallest Suffix Pattern
121	double_percent, // `${parameter%%[word]}` Remove Largest Suffix Pattern
122	hash,		// `${parameter#[word]}`  Remove Smallest Prefix Pattern
123	double_hash,	// `${parameter##[word]}` Remove Largest Prefix Pattern
124};
125
126class Word::Parameter : public Word
127{
128      public:
129	Parameter(const std::string name, Word::ParameterOp op,
130		  std::unique_ptr<Word> args);
131	std::string name;
132	Word::ParameterOp op;
133	std::unique_ptr<Word>
134	    args; // coudle be nullptr, single word or wordlist
135
136	Position dollar_pos;
137	Range name_range;
138	Range op_range;
139	Position lbrace_pos, rbrace_pos;
140};
141
142class Word::Command : public Word
143{
144      public:
145	Command(std::unique_ptr<Program> program, bool back_quoted);
146	Command(std::unique_ptr<Program> program, bool back_quoted,
147		Range range);
148	std::unique_ptr<Program> program;
149	bool back_quoted;
150
151	Range range;
152};
153
154class Word::Arithmetic : public Word
155{
156      public:
157	Arithmetic(std::unique_ptr<Word> body);
158	std::unique_ptr<Word> body; // cloud be empty
159};
160
161/**
162 * A word list is a type of word. It can be unquoted or double-quoted. Its
163 * children are _not_ separated by blanks. Here's an example:
164 *
165 *   abc"d ef"g'h i'
166 */
167
168class Word::List : public Word
169{
170      public:
171	List(std::vector<std::unique_ptr<Word>> &&children, bool double_quoted);
172	std::vector<std::unique_ptr<Word>> children;
173	bool double_quoted;
174};
175
176enum class CommandType {
177	simple_command,
178	brace_group,
179	subshell,
180	if_clause,
181	for_clause,
182	loop_clause, // `while` or `until`
183	case_clase,
184	function_define,
185};
186
187class Command : public Node
188{
189      public:
190	Command(CommandType type);
191	const CommandType type;
192
193	class SimpleCommand;
194	class BraceGroup;
195	class SubShell;
196	class IfClause;
197	class ForClause;
198
199	enum class LoopClauseType;
200	class LoopClause;
201	class CaseClause;
202	class FunctionDefine;
203};
204
205class Command::SimpleCommand : public Command
206{
207      public:
208	SimpleCommand();
209	std::vector<std::unique_ptr<Assignment>> assignments;
210
211	std::unique_ptr<Word> name = nullptr;
212	std::vector<std::unique_ptr<Word>> arguments;
213
214	// here_documents need been completed after full command list parse end
215	std::vector<std::shared_ptr<IORedirect>> io_redirects;
216};
217
218class Command::BraceGroup : public Command
219{
220      public:
221	BraceGroup(std::vector<std::unique_ptr<CommandList>> body);
222	BraceGroup(std::vector<std::unique_ptr<CommandList>> body,
223		   Position lbrace_pos, Position rbrace_pos);
224
225	std::vector<std::unique_ptr<CommandList>> body;
226	Position lbrace_pos, rbrace_pos;
227};
228class Command::SubShell : public Command
229{
230      public:
231	SubShell(std::vector<std::unique_ptr<CommandList>> body,
232		 Position lparen_pos, Position rparen_pos);
233
234	std::vector<std::unique_ptr<CommandList>> body;
235	Position lparen_pos, rparen_pos;
236};
237
238class Command::IfClause : public Command
239{
240      public:
241	IfClause(std::vector<std::unique_ptr<CommandList>> cond,
242		 std::vector<std::unique_ptr<CommandList>> body,
243		 std::unique_ptr<Command> else_part);
244
245	std::vector<std::unique_ptr<CommandList>> condition;
246	std::vector<std::unique_ptr<CommandList>> body;
247	std::unique_ptr<Command> else_part; // could be IfCluase for "elif"
248
249	Range if_range;
250	Range then_range, fi_range;
251	Range else_range;
252};
253class Command::ForClause : public Command
254{
255      public:
256	ForClause(bool in, std::vector<std::unique_ptr<ast::Word>> wordList,
257		  std::vector<std::unique_ptr<ast::CommandList>> body);
258	bool in;
259	std::vector<std::unique_ptr<ast::Word>> wordList;
260	std::vector<std::unique_ptr<ast::CommandList>> body;
261
262	// TODO: word ranges
263	Range for_range, name_range, do_range, done_range;
264	Range in_range;
265};
266
267enum class Command::LoopClauseType {
268	loop_while,
269	loop_until,
270};
271
272class Command::LoopClause : public Command
273{
274      public:
275	LoopClause(LoopClauseType type,
276		   std::vector<std::unique_ptr<CommandList>> condition,
277		   std::vector<std::unique_ptr<CommandList>> body);
278
279	LoopClauseType type;
280	std::vector<std::unique_ptr<CommandList>> condition;
281	std::vector<std::unique_ptr<CommandList>> body;
282
283	ast::Range while_until_range; // for `while` or `until`
284	ast::Range do_range, done_range;
285};
286class Command::CaseClause : public Command
287{
288      public:
289	CaseClause(std::unique_ptr<Word> word,
290		   std::vector<std::unique_ptr<struct CaseItem>> items);
291
292	std::unique_ptr<Word> word;
293	std::vector<std::unique_ptr<struct CaseItem>> items;
294
295	ast::Range case_range{}, in_range{}, esac_range{};
296};
297
298struct CaseItem {
299	std::vector<std::unique_ptr<Word>> patterns;
300	std::vector<std::unique_ptr<ast::CommandList>> body;
301
302	ast::Position lparen_pos, rparen_pos;
303	ast::Range dsemi_range;
304};
305
306class Command::FunctionDefine : public Command
307{
308      public:
309	FunctionDefine(std::string name, std::unique_ptr<ast::Command> command);
310	FunctionDefine(
311	    std::string name, std::unique_ptr<ast::Command> command,
312	    std::vector<std::shared_ptr<ast::IORedirect>> io_redirects);
313	std::string name;
314
315	std::unique_ptr<ast::Command> command;
316	std::vector<std::shared_ptr<ast::IORedirect>> io_redirects;
317
318	ast::Range name_range;
319	ast::Position lparen_pos, rparen_pos;
320};
321
322enum class AndOrListType { pipeline, bin_op };
323class AndOrList : public Node
324{
325      public:
326	AndOrList(AndOrListType type);
327	const AndOrListType type;
328
329	enum class BinOpType;
330	class BinOp;
331	class Pipeline;
332};
333class AndOrList::Pipeline : public AndOrList
334{
335      public:
336	Pipeline(std::vector<std::unique_ptr<Command>> cmds, bool bang,
337		 Position bangPos);
338
339	std::vector<std::unique_ptr<Command>> commands;
340	bool bang;
341	Position bangPos;
342};
343
344/**
345 * A binary operation is a type of AND-OR list which consists of multiple
346 * pipelines separated by `&&` or `||`.
347 */
348enum class AndOrList::BinOpType { op_and, op_or };
349class AndOrList::BinOp : public AndOrList
350{
351      public:
352	BinOp(BinOpType type, std::unique_ptr<AndOrList> left,
353	      std::unique_ptr<AndOrList> right, Range opRange);
354	const BinOpType type;
355
356	std::unique_ptr<AndOrList> left, right;
357	Range opRange;
358};
359
360/**
361 * A command list contains AND-OR lists separated by `;` (for sequential
362 * execution) or `&` (for asynchronous execution).
363 */
364class CommandList : public Node
365{
366      public:
367	CommandList(std::unique_ptr<AndOrList> and_or, bool ampersand);
368	CommandList(std::unique_ptr<AndOrList> and_or, bool ampersand,
369		    ast::Position sep_pos);
370	std::unique_ptr<AndOrList> and_or_list;
371	const bool ampersand; // whether the command list ends with `&`
372
373	ast::Position separator_pos{};
374};
375
376class Program : public Node
377{
378      public:
379	Program(std::vector<std::unique_ptr<CommandList>> body);
380
381      private:
382	std::vector<std::unique_ptr<CommandList>> body;
383};
384
385enum class IORedirectOp;
386class IORedirect;
387class Assignment;
388enum class IORedirectOp {
389	less,
390	greate,
391	clobber,
392	double_great,
393	less_and,
394	great_and,
395	less_great,
396	double_less,
397	double_less_dash,
398};
399class IORedirect
400{
401      public:
402	IORedirect(int io_num, IORedirectOp op, std::unique_ptr<Word> name,
403		   ast::Position ionum_pos, ast::Range op_range);
404	int io_number;
405	IORedirectOp op;
406	std::unique_ptr<Word> name;
407	std::vector<std::unique_ptr<Word>> here_document;
408
409	ast::Position io_number_pos{};
410	ast::Range op_range{};
411};
412class Assignment
413{
414      public:
415	Assignment(std::string name, std::unique_ptr<Word> value,
416		   ast::Range name_range, ast::Position equal_pos);
417	std::string name;
418	std::unique_ptr<Word> value;
419
420	ast::Range name_range;
421	ast::Position equal_pos;
422};
423} // namespace ast