MIXAL
parser.cpp
Go to the documentation of this file.
1 #include <cassert>
2 #include <iostream>
3 #include "parser.h"
4 #include "instructions.h"
5 
11 namespace mixal {
12 
19 enum class ParseState {
20  START,
21  LOC,
22  BEFORE_OP,
23  OP,
24  BEFORE_ADDRESS,
25  ADDRESS,
26  BEFORE_INDEX,
27  INDEX,
28  FIELD_OPEN,
29  FIELD,
30  FIELD_CLOSE,
31  BEFORE_COMMENT,
32  COMMENT,
33  END,
34 };
35 
36 std::ostream& operator<<(std::ostream& os, ParsedType c) {
37  switch (c) {
38  case ParsedType::EMPTY: os << "EMPTY"; break;
39  case ParsedType::INSTRUCTION: os << "INSTRUCTION"; break;
40  case ParsedType::PSEUDO: os << "PSEUDO"; break;
41  }
42  return os;
43 }
44 
45 bool ParsedResult::evaluate(const std::unordered_map<std::string, AtomicValue>& constants) {
46  if (rawAddress.length() > 0 && !evaluateAddress(constants)) {
47  return false;
48  }
49  if (rawIndex.length() > 0 && !evaluateIndex(constants)) {
50  return false;
51  }
52  if (rawField.length() > 0 && !evaluateField(constants)) {
53  return false;
54  }
55  return true;
56 }
57 
58 bool ParsedResult::evaluateAddress(const std::unordered_map<std::string, AtomicValue>& constants, int32_t index) {
59  if (!address.evaluated() && !address.evaluate(constants)) {
60  return false;
61  }
62  int32_t value = address.result().value;
63  if (parsedType == ParsedType::INSTRUCTION && !address.literalConstant() && std::abs(value) >= 4096) {
64  throw ParseError(index, "Address can not be represented in 2 bytes: " + std::to_string(value));
65  }
66  word.setAddress(address.result().negative, static_cast<uint16_t>(std::abs(value)));
67  return true;
68 }
69 
70 bool ParsedResult::evaluateIndex(const std::unordered_map<std::string, AtomicValue>& constants, int32_t column) {
71  if (!index.evaluated() && !index.evaluate(constants)) {
72  return false;
73  }
74  int32_t value = index.result().value;
75  if (value < 0 || 6 < value) {
76  throw ParseError(column, "Invalid index value: " + std::to_string(value));
77  }
78  word.setIndex(static_cast<uint8_t>(value));
79  return true;
80 }
81 
82 bool ParsedResult::evaluateField(const std::unordered_map<std::string, AtomicValue>& constants, int32_t index) {
83  if (!field.evaluated() && !field.evaluate(constants)) {
84  return false;
85  }
86  int32_t value = field.result().value;
87  int32_t defaultField = Instructions::getDefaultField(operation);
88  if (defaultField >= 0 && value != defaultField) {
89  throw ParseError(index, "The given field value does not match the default one: " +
90  std::to_string(value) + " != " + std::to_string(defaultField));
91  }
92  if (value < 0 || 64 <= value) {
93  throw ParseError(index, "Invalid field value: " + std::to_string(value));
94  }
95  word.setField(static_cast<uint8_t>(value));
96  return true;
97 }
98 
100  if (rawAddress.length() > 0 && !address.evaluated()) {
101  return false;
102  }
103  if (rawIndex.length() > 0 && !index.evaluated()) {
104  return false;
105  }
106  if (rawField.length() > 0 && !field.evaluated()) {
107  return false;
108  }
109  return true;
110 }
111 
112 std::ostream& operator<<(std::ostream& out, const ParsedResult& result) {
113  if (result.location.evaluated()) {
114  out << result.location.result().value << '\t';
115  } else {
116  out << result.rawLocation << '\t';
117  }
118  out << result.operation << '\t';
119  if (!result.rawAddress.empty()) {
120  if (result.address.evaluated()) {
121  out << result.address.result().value;
122  } else {
123  out << result.address;
124  }
125  }
126  if (!result.rawIndex.empty()) {
127  if (result.index.evaluated()) {
128  out << ',' << result.index.result().value;
129  } else {
130  out << ',' << result.index;
131  }
132  }
133  if (!result.rawField.empty()) {
134  if (result.field.evaluated()) {
135  out << '(' << result.field.result().value << ')';
136  } else {
137  out << '(' << result.field << ')';
138  }
139  }
140  return out;
141 }
142 
143 ParsedResult Parser::parseLine(const std::string& line, const std::string& lineSymbol, bool hasLocation) {
144  const char END_CHAR = '#';
145  const int INIT_INDEX = -1;
146  ParsedResult result;
147  result.word.setField(5); // For most of the operations, the default field value is (0:5) = 5.
148  result.parsedType = ParsedType::INSTRUCTION;
149  auto state = hasLocation ? ParseState::START : ParseState::BEFORE_OP;
150  int locationStart = INIT_INDEX,
151  operationStart = INIT_INDEX,
152  addressStart = INIT_INDEX,
153  indexStart = INIT_INDEX,
154  fieldStart = INIT_INDEX,
155  commentStart = INIT_INDEX,
156  defaultField = INIT_INDEX;
157  std::unordered_map<std::string, AtomicValue> emptyDict;
158  for (int i = 0; i <= static_cast<int>(line.size()); ++i) {
159  char ch = i < static_cast<int>(line.size()) ? line[i] : END_CHAR;
160  switch (state) {
161  case ParseState::START:
162  if (ch == ' ') {
163  // This line does not have the location name.
164  state = ParseState::BEFORE_OP;
165  } else if (ch == '*') {
166  // This line only contains comments.
167  state = ParseState::COMMENT;
168  result.parsedType = ParsedType::EMPTY;
169  commentStart = i;
170  } else if (ch == END_CHAR) {
171  state = ParseState::END;
172  result.parsedType = ParsedType::EMPTY;
173  } else if (isalnum(ch)) {
174  // A valid character in location.
175  state = ParseState::LOC;
176  locationStart = i;
177  } else {
178  throw ParseError(i, "Unexpected character encountered while parsing location: " + std::string(1, ch));
179  }
180  break;
181 
182  case ParseState::LOC:
183  if (ch == ' ') {
184  state = ParseState::BEFORE_OP;
185  result.rawLocation = line.substr(locationStart, i - locationStart);
186  } else if (!isalnum(ch)) {
187  throw ParseError(i, "Unexpected character encountered while parsing location: " + std::string(1, ch));
188  }
189  break;
190 
191  case ParseState::BEFORE_OP:
192  if (ch == ' ') {
193  continue;
194  } else if (ch == END_CHAR) {
195  if (locationStart != INIT_INDEX) {
196  throw ParseError(i, "No operation found after location");
197  }
198  state = ParseState::END;
199  result.parsedType = ParsedType::EMPTY;
200  } else if (isalnum(ch)) {
201  state = ParseState::OP;
202  operationStart = i;
203  } else {
204  throw ParseError(i, "Unexpected character encountered while finding operation: " + std::string(1, ch));
205  }
206  break;
207 
208  case ParseState::OP:
209  if (ch == ' ' || ch == END_CHAR) {
210  result.operation = line.substr(operationStart, i - operationStart);
211  int32_t operation = static_cast<int>(Instructions::getInstructionCode(result.operation));
212  if (ch == ' ') {
213  if (Instructions::hasArguments(static_cast<Instructions::Code>(operation))) {
214  state = ParseState::BEFORE_ADDRESS;
215  } else {
216  state = ParseState::BEFORE_COMMENT;
217  }
218  } else {
219  state = ParseState::END;
220  }
221  if (operation == Instructions::INVALID) {
222  throw ParseError(i, "Unknown operation: " + result.operation);
223  } else if (operation <= Instructions::LAST) {
224  result.word.setOperation(static_cast<uint8_t>(operation));
225  defaultField = Instructions::getDefaultField(result.operation);
226  } else {
227  result.parsedType = ParsedType::PSEUDO;
228  result.word.setOperation(static_cast<uint8_t>(operation - Instructions::PSEUDO));
229  if (operation == Instructions::ALF) {
230  // The "address" in ALF starts exactly two characters after the operation.
231  result.rawAddress = " ";
232  ++i;
233  for (int shift = 0; shift < 5 && i < static_cast<int>(line.size()); ++shift) {
234  result.rawAddress[shift] = line[++i];
235  }
236  int32_t charsValue = ComputerWord(result.rawAddress).value();
237  result.address = Expression::getConstExpression(AtomicValue(charsValue));
238  if (i < static_cast<int>(line.size())) {
239  state = ParseState::BEFORE_COMMENT;
240  } else {
241  state = ParseState::END;
242  }
243  }
244  }
245  } else if (!isalnum(ch)) {
246  throw ParseError(i, "Unexpected character encountered while parsing operation: " + std::string(1, ch));
247  }
248  break;
249 
250  case ParseState::BEFORE_ADDRESS:
251  if (ch == ' ') {
252  continue;
253  } else if (ch == END_CHAR) {
254  state = ParseState::END;
255  } else if (Expression::isValidFirst(ch)) {
256  state = ParseState::ADDRESS;
257  addressStart = i;
258  } else {
259  throw ParseError(i, "Unexpected character encountered while finding address: " + std::string(1, ch));
260  }
261  break;
262 
263  case ParseState::ADDRESS:
264  if (ch == ' ' || ch == ',' || ch == '(' || ch == END_CHAR) {
265  if (ch == ' ') {
266  state = ParseState::BEFORE_COMMENT;
267  } else if (ch == ',') {
268  state = ParseState::BEFORE_INDEX;
269  } else if (ch == '(') {
270  state = ParseState::FIELD_OPEN;
271  } else {
272  state = ParseState::END;
273  }
274  result.rawAddress = line.substr(addressStart, i - addressStart);
275  try {
276  result.address.parse(result.rawAddress, lineSymbol);
277  } catch (const ExpressionError& e) {
278  throw ParseError(addressStart + e.index(), e.what());
279  }
280  result.evaluateAddress(emptyDict);
281  } else if (!Expression::isValidChar(ch)) {
282  throw ParseError(i, "Unexpected character encountered while parsing address: " + std::string(1, ch));
283  }
284  break;
285 
286  case ParseState::BEFORE_INDEX:
287  if (Expression::isValidFirst(ch)) {
288  state = ParseState::INDEX;
289  indexStart = i;
290  } else if (ch == END_CHAR) {
291  throw ParseError(i, "No index found after comma");
292  } else {
293  throw ParseError(i, "Unexpected character encountered while finding index: " + std::string(1, ch));
294  }
295  break;
296 
297  case ParseState::INDEX:
298  if (ch == ' ' || ch == '(' || ch == END_CHAR) {
299  if (ch == ' ') {
300  state = ParseState::BEFORE_COMMENT;
301  } else if (ch == '(') {
302  state = ParseState::FIELD_OPEN;
303  } else {
304  state = ParseState::END;
305  }
306  result.rawIndex = line.substr(indexStart, i - indexStart);
307  try {
308  result.index.parse(result.rawIndex, lineSymbol);
309  } catch (const ExpressionError& e) {
310  throw ParseError(addressStart + e.index(), e.what());
311  }
312  result.evaluateIndex(emptyDict);
313  } else if (!Expression::isValidChar(ch)) {
314  throw ParseError(i, "Unexpected character encountered while parsing index: " + std::string(1, ch));
315  }
316  break;
317 
318  case ParseState::FIELD_OPEN:
319  if (Expression::isValidFirst(ch)) {
320  state = ParseState::FIELD;
321  fieldStart = i;
322  } else {
323  throw ParseError(i, "Unexpected character encountered "
324  "while parsing modification: " + std::string(1, ch));
325  }
326  break;
327 
328  case ParseState::FIELD:
329  if (ch == ')') {
330  state = ParseState::FIELD_CLOSE;
331  result.rawField = line.substr(fieldStart, i - fieldStart);
332  try {
333  result.field.parse(result.rawField, lineSymbol);
334  } catch (const ExpressionError& e) {
335  throw ParseError(addressStart + e.index(), e.what());
336  }
337  result.evaluateField(emptyDict);
338  } else if (!Expression::isValidChar(ch)) {
339  throw ParseError(i, "Unexpected character encountered while parsing index: " + std::string(1, ch));
340  }
341  break;
342 
343  case ParseState::FIELD_CLOSE:
344  if (ch == ' ' || ch == END_CHAR) {
345  if (ch == ' ') {
346  state = ParseState::BEFORE_COMMENT;
347  } else {
348  state = ParseState::END;
349  }
350  } else {
351  throw ParseError(i, "Unexpected character encountered while parsing field: " + std::string(1, ch));
352  }
353  break;
354 
355  case ParseState::BEFORE_COMMENT:
356  if (ch == END_CHAR) {
357  state = ParseState::END;
358  } else if (ch != ' ') {
359  state = ParseState::COMMENT;
360  commentStart = i;
361  }
362  break;
363 
364  case ParseState::COMMENT:
365  if (ch == END_CHAR) {
366  state = ParseState::END;
367  result.comment = line.substr(commentStart, i - commentStart);
368  }
369  break;
370 
371  case ParseState::END:
372  break;
373  }
374  }
375  if (result.rawField.empty()) {
376  if (result.word.operation() == Instructions::MOVE) {
377  // The default field value for MOVE is 1, but it is not mandatory.
378  defaultField = 1;
379  } else if (result.word.operation() == Instructions::NOP) {
380  // The whole word is `+0`, but it is not mandatory.
381  defaultField = 0;
382  }
383  }
384  if (defaultField >= 0) {
385  result.word.setField(defaultField);
386  }
387  assert(state == ParseState::END);
388  return result;
389 }
390 
391 }; // namespace mixal
mixal::ComputerWord::setField
void setField(uint8_t field)
Definition: memory.h:162
mixal::ParsedType
ParsedType
Definition: parser.h:20
mixal::Expression::isValidFirst
static bool isValidFirst(char ch)
Definition: expression.cpp:46
mixal::ComputerWord
Definition: memory.h:25
mixal::Expression::isValidChar
static bool isValidChar(char ch)
Definition: expression.cpp:50
mixal::ParsedResult::field
Expression field
Definition: parser.h:43
mixal::ParsedResult::rawLocation
std::string rawLocation
Definition: parser.h:35
mixal::ComputerWord::setIndex
void setIndex(uint8_t index)
Definition: memory.h:160
mixal::Parser::parseLine
static ParsedResult parseLine(const std::string &line, const std::string &lineSymbol, bool hasLocation=true)
Definition: parser.cpp:143
mixal::ParsedResult::rawAddress
std::string rawAddress
Definition: parser.h:38
mixal::ParsedResult::word
ComputerWord word
Definition: parser.h:44
mixal::Expression::parse
void parse(const std::string &expression, const std::string &lineSymbol)
Definition: expression.cpp:89
mixal::ParsedResult::rawField
std::string rawField
Definition: parser.h:42
mixal::ParsedResult::comment
std::string comment
Definition: parser.h:45
mixal::Expression::evaluated
bool evaluated() const
Definition: expression.h:144
mixal::ComputerWord::setAddress
void setAddress(int16_t address)
Definition: memory.cpp:149
mixal::ComputerWord::value
int32_t value() const
Definition: memory.cpp:132
mixal::ParsedResult::index
Expression index
Definition: parser.h:41
mixal::Instructions::getDefaultField
static int getDefaultField(const std::string &name)
Definition: instructions.cpp:1222
mixal::Expression::evaluate
bool evaluate(const std::unordered_map< std::string, AtomicValue > &constants)
Definition: expression.cpp:233
mixal::Expression::getConstExpression
static Expression getConstExpression(const AtomicValue &value)
Definition: expression.cpp:23
mixal::ParsedResult
Definition: parser.h:32
mixal::Instructions::hasArguments
static bool hasArguments(Instructions::Code code)
Definition: instructions.cpp:5
mixal::ParsedResult::evaluated
bool evaluated() const
Definition: parser.cpp:99
mixal::ParsedResult::location
Expression location
Definition: parser.h:36
mixal::Expression::literalConstant
bool literalConstant() const
Definition: expression.h:149
instructions.h
Definitions of instructions.
mixal::ParsedResult::evaluate
bool evaluate(const std::unordered_map< std::string, AtomicValue > &constants)
Definition: parser.cpp:45
mixal::operator<<
std::ostream & operator<<(std::ostream &out, Operation operation)
Definition: expression.cpp:335
parser.h
Parse one line of code.
mixal::ExpressionError::index
int index() const
Definition: errors.h:21
mixal::ComputerWord::operation
uint8_t operation() const
Definition: memory.h:153
mixal::Instructions::getInstructionCode
static Instructions::Code getInstructionCode(const std::string &name)
Definition: instructions.cpp:9
mixal::AtomicValue
Definition: expression.h:75
mixal::ParsedResult::rawIndex
std::string rawIndex
Definition: parser.h:40
mixal::ParsedResult::parsedType
ParsedType parsedType
Definition: parser.h:34
mixal::ParsedResult::operation
std::string operation
Definition: parser.h:37
mixal::Expression::result
const AtomicValue & result() const
Definition: expression.h:146
mixal::ParseState
ParseState
Definition: parser.cpp:19
mixal::ParseError
Definition: errors.h:33
mixal::ComputerWord::setOperation
void setOperation(uint8_t operation)
Definition: memory.h:164
mixal::ParsedResult::address
Expression address
Definition: parser.h:39
mixal::ExpressionError
Definition: errors.h:15
mixal::ExpressionError::what
const char * what() const noexcept override
Definition: errors.h:24