You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

279 lines
7.9 KiB

  1. /*!! Parser */
  2. /*!
  3. # Parser
  4. In order to parse simple while programs we use a
  5. [Recursive descent parser](https://en.wikipedia.org/wiki/Recursive_descent_parser). The syntax of our while programs
  6. are defined by the following grammar in
  7. [Extended Backus-Naur Form (EBNF)](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form):
  8. Prog = Id ":=" Expr |
  9. Prog ";" Prog |
  10. "if" "(" Expr ")" "then" "{" Prog "}" "else" "{" Prog "}" |
  11. "while" "(" Expr ")" "{" Prog "}"
  12. Expr = Expr "+" Atom |
  13. Expr "-" Atom |
  14. Atom
  15. Atom = Id | Num | "(" Expr ")"
  16. The non-terminal `Num` can be derived into an arbitrary integer. `Id` can be derived into an arbitrary identifier
  17. consisting of the lower case characters from `a` to `z`.
  18. Our parser takes the source code as argument and returns a `Program` object.
  19. */
  20. /*!- Header */
  21. package parser;
  22. import expression.*;
  23. import expression.Int;
  24. import program.*;
  25. import java.util.ArrayList;
  26. import java.util.List;
  27. /*!
  28. `Parser` provides a constructor which takes the source code as argument. The created object provides the method
  29. `parse` which returns the parsed `Program` object.
  30. Parser parser = new Parser("a := 1");
  31. Program program = parser.parse();
  32. */
  33. public class Parser {
  34. /*!
  35. The instance variable `input` contains the source code that should be parsed and `position` contains the current
  36. position of the parser in the `input` string. The following parsing methods each consider the characters of the
  37. `input` starting at `position`, e.g. `input.charAt(position)`. After consuming characters of the input the methods
  38. increment the `position`.
  39. */
  40. int position;
  41. final String input;
  42. public Parser(String input) {
  43. this.input = input;
  44. }
  45. public Program parse() {
  46. position = 0;
  47. Program program = program();
  48. whitespace();
  49. if (position < input.length()) {
  50. throw new SyntaxException("End of input", position);
  51. }
  52. return program;
  53. }
  54. Program program() {
  55. Program firstStatement = statement();
  56. List<Program> moreStatements = new ArrayList<Program>();
  57. while (test(";")) {
  58. consume(";");
  59. Program statement = statement();
  60. moreStatements.add(statement);
  61. }
  62. Program program = firstStatement;
  63. for (Program statement: moreStatements) {
  64. program = new Composition(program, statement);
  65. }
  66. return program;
  67. }
  68. Program statement() {
  69. int start = position;
  70. Program statement;
  71. try {
  72. statement = assignment();
  73. } catch (SyntaxException se) {
  74. position = start;
  75. try {
  76. statement = conditional();
  77. } catch (SyntaxException se2) {
  78. position = start;
  79. statement = loop();
  80. }
  81. }
  82. return statement;
  83. }
  84. Program loop() {
  85. consume("while");
  86. consume("(");
  87. Expression condition = expression();
  88. consume(")");
  89. consume("{");
  90. Program program = program();
  91. consume("}");
  92. return new Loop(condition, program);
  93. }
  94. Program conditional() {
  95. consume("if");
  96. consume("(");
  97. Expression condition = expression();
  98. consume(")");
  99. consume("then");
  100. consume("{");
  101. Program thenCase = program();
  102. consume("}");
  103. consume("else");
  104. consume("{");
  105. Program elseCase = program();
  106. consume("}");
  107. return new Conditional(condition, thenCase, elseCase);
  108. }
  109. Program assignment() {
  110. Identifier identifier = identifier();
  111. consume(":=");
  112. Expression expression = expression();
  113. return new Assignment(identifier, expression);
  114. }
  115. private static class OperatorWithExpression {
  116. private final Operator operator;
  117. private final Expression expression;
  118. OperatorWithExpression(Operator operator, Expression expression) {
  119. this.operator = operator;
  120. this.expression = expression;
  121. }
  122. }
  123. private enum Operator { PLUS, MINUS }
  124. private boolean testOperator() {
  125. int start = position;
  126. boolean result;
  127. try {
  128. operator();
  129. result = true;
  130. } catch (SyntaxException se) {
  131. result = false;
  132. }
  133. position = start;
  134. return result;
  135. }
  136. private Operator operator() {
  137. whitespace();
  138. char next = (char) 0;
  139. if (position < input.length()) {
  140. next = input.charAt(position);
  141. position += 1;
  142. }
  143. if (next == '+') {
  144. return Operator.PLUS;
  145. } else if (next == '-') {
  146. return Operator.MINUS;
  147. } else {
  148. throw new SyntaxException("Operator", position);
  149. }
  150. }
  151. Expression expression() {
  152. Expression firstAtom = atom();
  153. List<OperatorWithExpression> moreAtoms = new ArrayList<OperatorWithExpression>();
  154. while(testOperator()) {
  155. Operator operator = operator();
  156. Expression expression = atom();
  157. moreAtoms.add(new OperatorWithExpression(operator, expression));
  158. }
  159. Expression expression = firstAtom;
  160. for (OperatorWithExpression atom: moreAtoms) {
  161. switch (atom.operator) {
  162. case PLUS:
  163. expression = new Addition(expression, atom.expression);
  164. break;
  165. case MINUS:
  166. expression = new Subtraction(expression, atom.expression);
  167. break;
  168. }
  169. }
  170. return expression;
  171. }
  172. Expression atom() {
  173. int start = position;
  174. Expression result;
  175. try {
  176. consume("(");
  177. result = expression();
  178. consume(")");
  179. } catch (SyntaxException se) {
  180. position = start;
  181. try {
  182. result = integer();
  183. } catch (SyntaxException se2) {
  184. result = identifier();
  185. }
  186. }
  187. return result;
  188. }
  189. private boolean isLowerLetter(char ch) {
  190. return ch >= 'a' && ch <= 'z';
  191. }
  192. Expression integer() {
  193. whitespace();
  194. int start = position;
  195. boolean minus = position < input.length() && input.charAt(position) == '-';
  196. if (minus) {
  197. position += 1;
  198. }
  199. boolean digitsFound = false;
  200. while (position < input.length() && Character.isDigit(input.charAt(position))) {
  201. position += 1;
  202. digitsFound = true;
  203. }
  204. if (digitsFound) {
  205. return new Int(Integer.parseInt(input.substring(start, position)));
  206. } else {
  207. throw new SyntaxException("Integer", position);
  208. }
  209. }
  210. Identifier identifier() {
  211. whitespace();
  212. int start = position;
  213. while (position < input.length() && isLowerLetter(input.charAt(position))) {
  214. position += 1;
  215. }
  216. if (position > start) {
  217. return new Identifier(input.substring(start, position));
  218. } else {
  219. throw new SyntaxException("Identifier", position);
  220. }
  221. }
  222. private void whitespace() {
  223. while(position < input.length() && Character.isWhitespace(input.charAt(position))) {
  224. position += 1;
  225. }
  226. }
  227. private void consume(String token) {
  228. whitespace();
  229. if (position + token.length() <= input.length() && input.substring(position, position + token.length()).equals(token)) {
  230. position += token.length();
  231. } else {
  232. throw new SyntaxException(token, position);
  233. }
  234. }
  235. private boolean test(String token) {
  236. int start = position;
  237. boolean success;
  238. try {
  239. consume(token);
  240. success = true;
  241. } catch (SyntaxException se) {
  242. success = false;
  243. }
  244. position = start;
  245. return success;
  246. }
  247. }