buffalo
Loading...
Searching...
No Matches
buffalo.h
Go to the documentation of this file.
1#ifndef BUFFALO2_H
2#define BUFFALO2_H
3
4#include <algorithm>
5#include <cctype>
6#include <ctll.hpp>
7#include <ctre.hpp>
8#include <expected>
9#include <format>
10#include <list>
11#include <map>
12#include <optional>
13#include <ranges>
14#include <set>
15#include <stdexcept>
16#include <variant>
17#include <vector>
18#include <sstream>
19#include <memory>
20
21namespace bf
22{
23 /*
24 * Helper for std::visit provided by Andreas Fertig.
25 * https://andreasfertig.blog/2023/07/visiting-a-stdvariant-safely/
26 */
27 template<class...>
28 constexpr bool always_false_v = false;
29
30 template<class... Ts>
31 struct overload : Ts...
32 {
33 using Ts::operator()...;
34
35 template<typename T>
36 constexpr void operator()(T) const
37 {
38 static_assert(always_false_v<T>, "Unsupported type");
39 }
40 };
41
42 template<class... Ts>
43 overload(Ts...) -> overload<Ts...>;
44
48 struct Dummy {};
49
56 [[nodiscard]] inline std::string utf32_to_string(char32_t const *utf32, std::size_t size)
57 {
58 std::string buffer(size, '0');
59
60 for (std::size_t i = 0; i < size; i++)
61 {
62 buffer[i] = static_cast<char>(utf32[i]);
63 }
64
65 return std::move(buffer);
66 }
67
68 /*
69 * GRAMMAR DEFINITION
70 */
71 template<typename T>
72 concept IGrammar = requires(T)
73 {
74 typename T::ValueType;
75 typename T::UserDataType;
76 };
77
78 /*
79 * FORWARD-DECLS
80 */
81 template<IGrammar G>
82 class Grammar;
83
84 template<IGrammar G>
86
87 template<IGrammar G>
89
90 template<IGrammar G>
91 class ProductionRule;
92
93 template<IGrammar G>
95
96 template<IGrammar G>
97 class Terminal;
98
99 template<IGrammar G, ctll::fixed_string regex, typename SemanticType>
100 class DefineTerminal;
101
102 template<IGrammar G>
103 class NonTerminal;
104
105 template<IGrammar G, ctll::fixed_string DebugName, typename SemanticType>
106 class DefineNonTerminal;
107
108 template<IGrammar G>
109 struct LRItem;
110
111 template<IGrammar G>
112 struct LRState;
113
114 template<IGrammar G>
115 class Parser;
116
117 template<IGrammar G>
118 class SLRParser;
119
125 struct Location
126 {
127 std::string_view buffer;
128 std::size_t begin;
129 std::size_t end;
130
136 [[nodiscard]] std::string_view SnippetString(std::size_t padding = 10) const
137 {
138 std::size_t start = std::max(static_cast<std::size_t>(0), this->begin - padding);
139 std::size_t n = this->end - this->begin + (padding * 2);
140
141 return buffer.substr(start, n);
142 }
143 };
144
145 class Error : public std::exception
146 {
147 protected:
148 std::string message_;
149
150 public:
151 [[nodiscard]] char const *what() const noexcept override
152 {
153 return this->message_.c_str();
154 }
155
156 Error(std::string message) : message_(std::move(message)) {}
157 Error() = default;
158 };
159
160 template<IGrammar G>
162 {
163 static void AppendRule(std::stringstream &stream, ProductionRule<G> const *rule, int dot = -1)
164 {
165 stream << "\t" << rule->non_terminal_->GetName() << " -> ";
166
167 for (int i = 0; i < rule->sequence_.size(); i++)
168 {
169 if (i == dot)
170 {
171 stream << ". ";
172 }
173
174 char const *name = std::visit(overload{
175 [](Terminal<G> *terminal)
176 {
177 return terminal->GetName();
178 },
180 {
181 return non_terminal->GetName();
182 }
183 }, rule->sequence_[i]);
184
185 stream << name << " ";
186
187 if (dot == rule->sequence_.size() && i == rule->sequence_.size() - 1)
188 {
189 stream << ".";
190 }
191 }
192
193 stream << "\n";
194 }
195
196 public:
198 {
199 char const *reduce_name = reduce->non_terminal_->GetName();
200 char const *lookahead_name = lookahead->GetName();
201
202 std::stringstream message;
203 message << "Grammar contains an irreconcilable shift-reduce conflict when deciding to reduce " << reduce_name << " or shift " << lookahead_name << ".\n";
204 message << "Shift-reduce conflicts can be solved by:\n";
205 message << "\t1. Refactoring your grammar.\n";
206 message << "\t2. Adding precedence to your terminals.\n";
207 message << "\t3. Adding Associativity to your terminals.\n\n";
208 message << "The conflict arose in the following state with the following closure:\n";
209
210 for (LRItem<G> &item : state.GenerateClosure())
211 {
212 GrammarDefinitionError::AppendRule(message, item.rule, static_cast<int>(item.position));
213 }
214
215 message << "When deciding to reduce the following rule:\n";
216 GrammarDefinitionError::AppendRule(message, reduce);
217
218 message << "Or shift " << lookahead_name << " to the following state:\n";
219
220 for (LRItem<G> &item : shift.GenerateClosure())
221 {
222 GrammarDefinitionError::AppendRule(message, item.rule, static_cast<int>(item.position));
223 }
224
225 return {message.str()};
226 }
227
229 {
230 char const *a_name = a->non_terminal_->GetName();
231 char const *b_name = b->non_terminal_->GetName();
232
233 std::stringstream message;
234 message << "Grammar contains an irreconcilable reduce-reduce conflict between " << a_name << "/" << b_name << ".\n";
235 message << "Reduce-reduce conflicts are normally solved by refactoring your grammar.\n";
236 message << "The conflict arose in the following two rules:\n\n";
237
238 for (auto const rule : {a, b})
239 {
240 GrammarDefinitionError::AppendRule(message, rule);
241 }
242
243 message << "With lookahead " << lookahead->GetName() << "\n";
244
245 message << "In the state with the following closure:\n";
246 for (LRItem<G> &item : state.GenerateClosure())
247 {
248 GrammarDefinitionError::AppendRule(message, item.rule, static_cast<int>(item.position));
249 }
250
251 return {message.str()};
252 }
253
255 GrammarDefinitionError() : Error("Unknown Grammar Definition Error") {}
256 };
257
258 class ParsingError : public Error
259 {
260 Location location_;
261
262 public:
263 ParsingError(Location location, std::string_view message) : location_(location)
264 {
265 std::stringstream mstream;
266 mstream << message << "\n";
267 mstream << "\t" << location.SnippetString(10) << "\n";
268 mstream << "\t" << std::string(10, ' ') << '^' << std::string(location.end - location.begin, '~') << "\n";
269
270 this->message_ = mstream.str();
271 }
272 };
273
278 template<IGrammar G>
279 struct Token
280 {
282 std::string_view raw;
284
285 std::size_t Size() const
286 {
287 return this->location.end - this->location.begin;
288 }
289 };
290
295 template<IGrammar G>
297 {
298 std::string_view raw;
301
302 [[nodiscard]] operator typename G::ValueType()
303 {
304 return this->value;
305 }
306
307 ValueToken(std::string_view raw, Location location, typename G::ValueType &&value) : raw(raw), location(location), value(std::move(value)) {}
308 };
309
310 template<IGrammar G>
312 {
313 friend class ValueTokenReference<G>;
314 friend class ValueTokenAccessor<G>;
315 friend class SLRParser<G>;
316
317 std::vector<ValueToken<G>> values_;
318
319 public:
321 {
322 this->values_.reserve(100);
323 }
324 };
325
326 template<IGrammar G>
328 {
329 std::size_t index_;
330 std::shared_ptr<ValueTokenStore<G>> store_;
331
332 public:
334 {
335 return this->store_->values_[this->index_];
336 }
337
338 [[nodiscard]] typename G::ValueType &GetValue() const
339 {
340 return this->store_->values_[this->index_].value;
341 }
342
343 ValueTokenReference(std::size_t index, std::shared_ptr<ValueTokenStore<G>> store) : index_(index), store_(store) {}
344 };
345
346 template<IGrammar G>
348 {
349 std::span<std::size_t> indices_;
350 std::shared_ptr<ValueTokenStore<G>> store_;
351
352 std::string_view raw_;
353 Location location_;
354
355 public:
356 [[nodiscard]] typename G::ValueType &operator[](std::size_t i)
357 {
358 return this->store_->values_[this->indices_[i]].value;
359 }
360
362 {
363 return this->store_->values_[this->indices_[i]];
364 }
365
367 {
368 this->store_->values_.emplace_back(this->raw_, this->location_, std::move(value));
369
370 return *this;
371 }
372
373 ValueTokenAccessor(std::span<std::size_t> indices, std::shared_ptr<ValueTokenStore<G>> store) : indices_(indices), store_(store) {}
374 };
375
380 template<typename GValueType, typename GUserDataType = Dummy>
381 requires std::constructible_from<GValueType>
383 {
384 public:
385 /*
386 * TYPES
387 */
390 };
391
401
405 template<IGrammar G>
407 {
408 friend class Grammar<G>;
409
410 public:
411 using ReasonerType = typename G::ValueType(*)(Token<G> const&);
412
413 protected:
414 std::string name_ = "\"UNKNOWN\"";
415
416 inline static std::size_t counter_ = 0;
417
419
420 Terminal() = default;
421
422 public:
426
432 typename G::ValueType Reason(Token<G> const &token) const
433 {
434 if(this->reasoner_)
435 {
436 return this->reasoner_(token);
437 }
438
439 return {};
440 }
441
442 virtual std::optional<Token<G>> Lex(std::string_view input) const
443 {
444 return std::nullopt;
445 }
446
447 [[nodiscard]] char const *GetName() const noexcept
448 {
449 return this->name_.c_str();
450 }
451
452 Terminal(Terminal<G> &&) = delete;
453 Terminal(Terminal<G> const &) = delete;
454 };
455
460 template<IGrammar G, ctll::fixed_string regex, typename SemanticType = Dummy>
461 class DefineTerminal : public Terminal<G>
462 {
463 public:
465 {
466 return std::get<SemanticType>(value);
467 }
468
469 std::optional<Token<G>> Lex(std::string_view input) const override
470 {
471 auto match = ctre::starts_with<regex>(input);
472
473 if(!match)
474 {
475 return std::nullopt;
476 }
477
478 return Token<G> {
479 .terminal = (Terminal<G>*)this,
480 .raw = match.view(),
481 .location = {
482 .begin = 0,
483 .end = match.size(),
484 },
485 };
486 }
487
489 {
490 this->name_ = std::format("\"{}\"", utf32_to_string(regex.content, regex.size()));
491 this->associativity = assoc;
492 this->user_data = user_data;
493 this->reasoner_ = reasoner;
494 }
495
497
500
502 };
503
507 template<IGrammar G>
509 {
510 friend class Grammar<G>;
511 friend struct LRState<G>;
512
513 public:
515
516 protected:
517 std::string name_ = "unknown";
518 std::vector<ProductionRule<G>> rules_;
519
520 public:
521 [[nodiscard]] char const *GetName() const noexcept
522 {
523 return this->name_.c_str();
524 }
525
528
529 NonTerminal() = default;
530
531 NonTerminal(ProductionRule<G> const &rule, std::string name = "unknown") : rules_({rule}), name_(std::move(name)) {}
532 NonTerminal(ProductionRuleList<G> const &rule_list, std::string name = "unknown") : rules_(rule_list.rules), name_(std::move(name)) {}
533 };
534
538 template<IGrammar G, ctll::fixed_string DebugName = "Generic", typename SemanticValue = Dummy>
555
559 template<IGrammar G>
560 using Symbol = std::variant<Terminal<G>*, NonTerminal<G>*>;
561
562 template<IGrammar G>
564 {
565 std::set<Terminal<G>*> short_circuit{};
566
568 {
569 return this->short_circuit == other.short_circuit;
570 }
571
573 {
574 this->short_circuit.insert(other.short_circuit.begin(), other.short_circuit.end());
575
576 return *this;
577 }
578 };
579
580 /*
581 * PRODUCTION RULES
582 */
583 template<IGrammar G>
585 {
586 friend class Grammar<G>;
587 friend struct LRItem<G>;
588 friend class Parser<G>;
589 friend class SLRParser<G>;
590 friend class GrammarDefinitionError<G>;
591
592 protected:
594 std::vector<Symbol<G>> sequence_;
595 std::size_t precedence = -1;
596
598
600
601 public:
603 {
604 this->sequence_.push_back(&rhs);
605
606 return *this;
607 }
608
610 {
611 this->sequence_.push_back(&rhs);
612
613 return *this;
614 }
615
623 {
624 this->mod_.short_circuit.insert(&lookahead);
625
626 return *this;
627 }
628
630 {
631 this->transductor_ = tranductor;
632
633 return *this;
634 }
635
637 {
638 // NonTerminal
639 if(this->non_terminal_ != other.non_terminal_)
640 {
641 return false;
642 }
643
644 if (this->mod_ != other.mod_)
645 {
646 return false;
647 }
648
649 // Sequence
650 if(this->sequence_.size() != other.sequence_.size()) return false;
651 for(auto const &[first, second] : std::views::zip(this->sequence_, other.sequence_))
652 {
653 if(first != second) return false;
654 }
655
656 return true;
657 }
658
660 {
661 if(this->transductor_)
662 {
663 this->transductor_($);
664 }
665 // If no transductor is defined, return the first element in sequence.
666 else if (this->sequence_.size() > 0)
667 {
668 $ = $[0];
669 }
670 }
671
672 ProductionRule(Terminal<G> &terminal) : sequence_({ &terminal }) {}
674 };
675
679 template<IGrammar G>
681
686 template<IGrammar G>
688 {
689 std::vector<ProductionRule<G>> rules;
690
692 {
693 this->rules.push_back(rule);
694
695 return *this;
696 }
697 };
698
699 /*
700 * GRAMMAR
701 */
702 template<IGrammar G>
704 {
705 friend class Parser<G>;
706 friend class SLRParser<G>;
707
708 protected:
713 std::unique_ptr<Terminal<G>> EOS;
714
715 std::set<NonTerminal<G>*> nonterminals_;
716 std::set<Terminal<G>*> terminals_;
717
718 std::map<NonTerminal<G>*, std::set<Terminal<G>*>> first_;
719 std::map<NonTerminal<G>*, std::set<Terminal<G>*>> follow_;
720
722 std::vector<std::pair<NonTerminal<G>*, ProductionRule<G>>> production_rules_;
723
724 public:
726
728 {
729 return this->nonterminals_.contains(&non_terminal);
730 }
731
733 {
734 return this->follow_.at(&non_terminal).contains(&terminal);
735 }
736
738 {
739 return this->first_.at(&non_terminal).contains(&terminal);
740 }
741
747 {
748 bool has_change;
749 do
750 {
751 has_change = false;
752
753 for(auto const &[nonterminal, rule] : this->production_rules_)
754 {
755 if(rule.sequence_.empty()) continue;
756
757 has_change |= std::visit(overload{
758 [&](Terminal<G> *terminal)
759 {
760 auto [it, inserted] = this->first_[nonterminal].insert(terminal);
761 return inserted;
762 },
764 {
766 {
767 return false;
768 }
769
770 auto &parent_first = this->first_[nonterminal];
771 auto &child_first = this->first_[child_nonterminal];
772
773 std::size_t parent_size = parent_first.size();
774 parent_first.insert(child_first.begin(), child_first.end());
775
776 return parent_size != parent_first.size();
777 }
778 }, rule.sequence_[0]);
779 }
780 } while(has_change);
781 }
782
788 {
789 this->follow_[&root] = { this->EOS.get() };
790
791 bool has_change;
792 do
793 {
794 has_change = false;
795
796 for(auto &[nonterminal, rule] : this->production_rules_)
797 {
798 for(int i = 0; i < rule.sequence_.size(); i++)
799 {
800 // Skip over Terminals
801 if(std::holds_alternative<Terminal<G>*>(rule.sequence_[i])) continue;
802
803 // Process NonTerminal
804 auto symbol = std::get<NonTerminal<G>*>(rule.sequence_[i]);
805
806 // If this is the last NonTerminal in the sequence, then it gets all of the FOLLOW of parent.
807 if(i == rule.sequence_.size() - 1)
808 {
809 auto &parent_follow = this->follow_[nonterminal];
810 auto &child_follow = this->follow_[symbol];
811
812 std::size_t child_size = child_follow.size();
813 child_follow.insert(parent_follow.begin(), parent_follow.end());
814
816 continue;
817 }
818
819 // ELSE process the next token
820 auto follow = rule.sequence_[i + 1];
821
822 has_change |= std::visit(overload{
823 [&](Terminal<G> *terminal)
824 {
825 auto [it, inserted] = this->follow_[symbol].insert(terminal);
826 return inserted;
827 },
829 {
830 auto &symbol_follow = this->follow_[symbol];
831 auto &follow_first = this->first_[follow_nonterminal];
832
833 std::size_t symbol_size = symbol_follow.size();
834 symbol_follow.insert(follow_first.begin(), follow_first.end());
835
836 return symbol_size != symbol_follow.size();
837 }
838 }, follow);
839 }
840 }
841 } while(has_change);
842 }
843
845 {
846 this->nonterminals_.insert(nonterminal);
847
848 for(auto &rule : nonterminal->rules_)
849 {
851 Terminal<G> *last_terminal = nullptr;
852
853 this->production_rules_.push_back({nonterminal, rule});
854
855 for(auto &symbol : rule.sequence_)
856 {
857 std::visit(overload{
858 [&](Terminal<G> *terminal)
859 {
860 last_terminal = terminal;
861 this->terminals_.insert(terminal);
862 },
864 {
865 if(this->nonterminals_.contains(child_nonterminal)) return;
866
868 }
869 }, symbol);
870 }
871
872 // Rule precedence defaults to the precedence of the LAST terminal in sequence.
873 if(last_terminal)
874 {
876 }
877 }
878 }
879
881 {
882 this->EOS = std::make_unique<DefineTerminal<G, R"(\Z)">>();
883 this->terminals_.insert(this->EOS.get());
884
885 this->RegisterSymbols(&start);
886
887 this->GenerateFirstSet();
888 this->GenerateFollowSet();
889 }
890
891 Grammar() = delete;
892 };
893
894 /*
895 * PRODUCTION RULE COMPOSITION FUNCTIONS
896 */
897 template<IGrammar G>
902
903 template<IGrammar G>
908
909 template<IGrammar G>
914
915 template<IGrammar G>
920
921 template<IGrammar G>
926
931 template<IGrammar G>
932 struct LRItem
933 {
936 std::size_t position;
937
938 [[nodiscard]] bool Complete() const
939 {
940 return this->position >= this->rule->sequence_.size();
941 }
942
944 {
945 return LRItem(this->rule, this->position + 1);
946 }
947
949 {
950 return this->rule->sequence_[this->position];
951 }
952
953 bool operator==(LRItem const &other) const
954 {
955 return *this->rule == *other.rule && this->mod == other.mod && this->position == other.position;
956 }
957
959
960 LRItem() = delete;
961 };
962
963 using lrstate_id_t = std::size_t;
964
969 template<IGrammar G>
970 struct LRState
971 {
972 std::vector<LRItem<G>> kernel_items;
973
974 std::vector<LRItem<G>> GenerateClosure() const
975 {
976 std::vector<LRItem<G>> closure = this->kernel_items;
977 std::set<NonTerminal<G>*> closed_nonterminals;
978
979 for(int i = 0; i < closure.size(); i++)
980 {
981 if(closure[i].Complete()) continue;
982
983 std::visit(overload{
984 [&](Terminal<G> *terminal)
985 {
986 // Remove this rule if it is present it item's short-circuit list
987 if (closure[i].mod.short_circuit.contains(terminal))
988 {
989 closure.erase(closure.begin() + (i--));
990 }
991 },
993 {
994 if(closed_nonterminals.contains(non_terminal)) return;
996
997 for(auto const &rule : non_terminal->rules_)
998 {
999 auto &new_item = closure.emplace_back(&rule);
1000 new_item.mod += closure[i].mod;
1001 }
1002 }
1003 }, closure[i].NextSymbol());
1004 }
1005
1006 return closure;
1007 }
1008
1009 std::map<Symbol<G>, LRState<G>> GenerateTransitions() const
1010 {
1011 std::map<Symbol<G>, LRState<G>> transitions;
1012
1013 auto closure = this->GenerateClosure();
1014
1015 for(auto const &item : closure)
1016 {
1017 if (item.Complete()) continue;
1018
1019 transitions[item.NextSymbol()].kernel_items.push_back(item.Advance());
1020 }
1021
1022 return transitions;
1023 }
1024
1030 bool operator==(LRState const &other) const
1031 {
1032 if(this->kernel_items.size() != other.kernel_items.size()) return false;
1033
1034 for(int i = 0; i < this->kernel_items.size(); i++)
1035 {
1036 if(this->kernel_items[i] != other.kernel_items[i]) return false;
1037 }
1038
1039 return true;
1040 }
1041
1043 {
1044 for(auto const &rule : start->rules_)
1045 {
1046 this->kernel_items.emplace_back(&rule);
1047 }
1048 }
1049
1050 LRState() = default;
1051 };
1052
1056 enum class LRActionType
1057 {
1058 kError,
1059 kAccept,
1060 kShift,
1061 kReduce,
1062 };
1063
1068 template<IGrammar G>
1070 {
1072
1073 union
1074 {
1077 };
1078 };
1079
1084 template<IGrammar G>
1086 {
1087 public:
1088 virtual std::expected<ValueTokenReference<G>, Error> Parse(std::string_view input) const = 0;
1089
1090 virtual ~Parser() = default;
1091 };
1092
1097 template<IGrammar G>
1098 class SLRParser final : public Parser<G>
1099 {
1100 Grammar<G> grammar_;
1101
1102 std::map<lrstate_id_t, std::map<Terminal<G>*, LRAction<G>>> action_;
1103 std::map<lrstate_id_t, std::map<NonTerminal<G>*, lrstate_id_t>> goto_;
1104
1105 [[nodiscard]] LRAction<G> const &LookupAction(lrstate_id_t state, Terminal<G> *lookahead) const
1106 {
1107 return this->action_.at(state).at(lookahead);
1108 }
1109
1110 [[nodiscard]] lrstate_id_t LookupGoto(lrstate_id_t state, NonTerminal<G> *non_terminal) const
1111 {
1112 return this->goto_.at(state).at(non_terminal);
1113 }
1114
1115 struct Tokenizer
1116 {
1117 SLRParser<G> const &parser;
1118 std::string_view input;
1119 std::size_t index = 0;
1120
1121 std::optional<Token<G>> Next(lrstate_id_t state = 0)
1122 {
1123 while(this->index < this->input.size() && std::isspace(this->input[this->index])) this->index++;
1124
1125 for(auto terminal : this->parser.action_.at(state) | std::views::keys)
1126 {
1127 auto token = terminal->Lex(this->input.substr(this->index));
1128 if(token)
1129 {
1130 token->location.begin += this->index;
1131 token->location.end += this->index;
1132
1133 this->index += token->Size();
1134
1135 return token;
1136 }
1137 }
1138
1139 return std::nullopt;
1140 }
1141
1142 Tokenizer(SLRParser<G> const &parser, std::string_view input) : parser(parser), input(input) {}
1143 };
1144
1151 lrstate_id_t FindOrInsertLRState(std::vector<LRState<G>> &state_list, LRState<G> const &state)
1152 {
1153 auto it = std::ranges::find(state_list, state);
1154
1155 // Does not exist, add it in.
1156 if(it == state_list.end())
1157 {
1158 state_list.push_back(state);
1159 return state_list.size() - 1;
1160 }
1161
1162 // Otherwise return index.
1163 return std::distance(state_list.begin(), it);
1164 }
1165
1169 std::optional<Error> BuildParsingTables()
1170 {
1171 std::vector<LRState<G>> states{};
1172 states.emplace_back(&this->grammar_.root);
1173
1174 // Finite State Machine
1175 std::map<lrstate_id_t, std::map<Symbol<G>, lrstate_id_t>> fsm;
1176
1177 for(lrstate_id_t i = 0; i < states.size(); i++)
1178 {
1179 // Process all transitions
1180 for(auto const &[symbol, new_state] : states[i].GenerateTransitions())
1181 {
1182 lrstate_id_t new_state_id = this->FindOrInsertLRState(states, new_state);
1184
1185 // Create SHIFT/GOTO entries in parsing tables
1186 std::visit(overload{
1187 // Create ACTION
1188 [&](Terminal<G> *terminal)
1189 {
1190 this->action_[i][terminal] = {
1191 .type = LRActionType::kShift,
1192 .state = new_state_id,
1193 };
1194 },
1195
1196 // Create GOTO
1198 {
1199 this->goto_[i][non_terminal] = new_state_id;
1200 },
1201 }, symbol);
1202 }
1203
1204 // Create REDUCE/ACCEPT entries in parsing tables
1205 for(auto const &item : states[i].kernel_items)
1206 {
1207 if(!item.Complete()) continue;
1208
1209 for(auto follow_terminal : this->grammar_.follow_[item.rule->non_terminal_])
1210 {
1211 // There will be no conflict
1212 if (!this->action_[i].contains(follow_terminal))
1213 {
1214 this->action_[i][follow_terminal] = {
1215 .type = LRActionType::kReduce,
1216 .rule = item.rule,
1217 };
1218 continue;
1219 }
1220
1221 // There will be conflict (FIGHT!)
1222 LRAction<G> conflict = this->action_[i][follow_terminal];
1223 if (conflict.type == LRActionType::kShift) // SHIFT-REDUCE
1224 {
1225 // Reduce due to higher precedence
1227 {
1228 this->action_[i][follow_terminal] = {
1229 .type = LRActionType::kReduce,
1230 .rule = item.rule,
1231 };
1232 continue;
1233 }
1234
1235 // Shift due to lower precedence
1237 {
1238 continue;
1239 }
1240
1241 // Reduce due to associativity rule
1242 if(follow_terminal->associativity == Associativity::Left)
1243 {
1244 this->action_[i][follow_terminal] = {
1245 .type = LRActionType::kReduce,
1246 .rule = item.rule,
1247 };
1248 continue;
1249 }
1250
1251 // Shift due to associativity rule
1252 if(follow_terminal->associativity == Right)
1253 {
1254 continue;
1255 }
1256
1257 // Unable to resolve conflict
1259 }
1260 else if (conflict.type == LRActionType::kReduce) // REDUCE-REDUCE
1261 {
1263 }
1264 }
1265 }
1266 }
1267
1268 this->action_[0][this->grammar_.EOS.get()] = {
1269 .type = LRActionType::kAccept,
1270 };
1271
1272 this->goto_[0][&this->grammar_.root] = 0;
1273
1274 return std::nullopt;
1275 }
1276
1277 protected:
1285
1286 public:
1287 Grammar<G> const &GetGrammar() const
1288 {
1289 return this->grammar_;
1290 }
1291
1292 std::expected<ValueTokenReference<G>, Error> Parse(std::string_view input) const override
1293 {
1294 constexpr int expected_max_pr_length = 50;
1295
1296 Tokenizer tokenizer(*this, input);
1297
1298 std::vector<lrstate_id_t> stack;
1300 stack.push_back(0);
1301
1302 std::vector<std::size_t> value_stack;
1304
1305 auto store = std::make_shared<ValueTokenStore<G>>();
1306
1307 std::optional<Token<G>> lookahead = tokenizer.Next(stack.back());
1308 while(true)
1309 {
1310 if(!lookahead)
1311 {
1312 Location location = {
1313 .buffer = input,
1314 .begin = tokenizer.index,
1315 .end = tokenizer.index,
1316 };
1317 return std::unexpected(ParsingError{location, "Unexpected Token!"});
1318 }
1319
1320 auto &action = this->LookupAction(stack.back(), lookahead->terminal);
1321 if (action.type == LRActionType::kAccept)
1322 {
1323 return ValueTokenReference<G>(store->values_.size() - 1, store);
1324 }
1325 else if (action.type == LRActionType::kShift)
1326 {
1327 typename G::ValueType value = std::move(lookahead->terminal->Reason(*lookahead));
1328 store->values_.emplace_back(lookahead->raw, lookahead->location, std::move(value));
1329
1330 stack.push_back(action.state);
1331 value_stack.push_back(store->values_.size() - 1);
1332
1333 lookahead = tokenizer.Next(stack.back());
1334 }
1335 else if (action.type == LRActionType::kReduce)
1336 {
1337 auto sequence_size = action.rule->sequence_.size();
1339
1340 action.rule->Transduce(accessor);
1341
1342 stack.erase(stack.end() - sequence_size, stack.end());
1343 stack.push_back(this->LookupGoto(stack.back(), action.rule->non_terminal_));
1344
1345 value_stack.erase(value_stack.end() - sequence_size, value_stack.end());
1346 value_stack.push_back(store->values_.size() - 1);
1347 }
1348 else
1349 {
1350 return std::unexpected(ParsingError(lookahead->location, "Unexpected Token!"));
1351 }
1352 }
1353 }
1354
1355 static std::expected<SLRParser, Error> Build(NonTerminal<G> &start)
1356 {
1357 SLRParser parser(start);
1358
1359 auto error = parser.BuildParsingTables();
1360 if(error)
1361 {
1362 return std::unexpected(*error);
1363 }
1364
1365 return std::move(parser);
1366 }
1367
1371 SLRParser() = delete;
1372 };
1373}
1374
1375#endif //BUFFALO2_H
bf::GrammarDefinition< double > G
Definition calculator.cpp:8
DefineNonTerminal(ProductionRule< G > const &rule)
Definition buffalo.h:552
DefineNonTerminal(NonTerminal< G > &single_non_terminal)
Definition buffalo.h:550
DefineNonTerminal(ProductionRuleList< G > const &rule_list)
Definition buffalo.h:553
DefineNonTerminal(Terminal< G > &single_terminal)
Definition buffalo.h:549
SemanticValue & operator()(typename G::ValueType &value)
Definition buffalo.h:542
constexpr DefineTerminal(typename G::UserDataType user_data)
Definition buffalo.h:498
SemanticType & operator()(typename G::ValueType &value)
Definition buffalo.h:464
constexpr DefineTerminal(Associativity assoc=bf::None, typename G::UserDataType user_data={}, typename Terminal< G >::ReasonerType reasoner=nullptr)
Definition buffalo.h:488
constexpr DefineTerminal(Associativity assoc, typename Terminal< G >::ReasonerType reasoner)
Definition buffalo.h:496
constexpr DefineTerminal(typename G::UserDataType user_data, typename Terminal< G >::ReasonerType reasoner)
Definition buffalo.h:499
std::optional< Token< G > > Lex(std::string_view input) const override
Definition buffalo.h:469
constexpr DefineTerminal(typename Terminal< G >::ReasonerType reasoner)
Definition buffalo.h:501
Error()=default
Error(std::string message)
Definition buffalo.h:156
char const * what() const noexcept override
Definition buffalo.h:151
std::string message_
Definition buffalo.h:148
static GrammarDefinitionError ReduceReduce(LRState< G > &state, ProductionRule< G > const *a, ProductionRule< G > const *b, Terminal< G > *lookahead)
Definition buffalo.h:228
static GrammarDefinitionError ShiftReduce(LRState< G > &state, LRState< G > &shift, ProductionRule< G > const *reduce, Terminal< G > *lookahead)
Definition buffalo.h:197
GrammarDefinitionError(std::string message)
Definition buffalo.h:254
GUserDataType UserDataType
Definition buffalo.h:389
GValueType ValueType
Definition buffalo.h:388
std::map< NonTerminal< G > *, std::set< Terminal< G > * > > follow_
Definition buffalo.h:719
Grammar()=delete
std::unique_ptr< Terminal< G > > EOS
Definition buffalo.h:713
void GenerateFirstSet()
Definition buffalo.h:746
bool NonTerminalHasFollow(NonTerminal< G > &non_terminal, Terminal< G > &terminal) const
Definition buffalo.h:732
bool HasNonTerminal(NonTerminal< G > &non_terminal) const
Definition buffalo.h:727
std::set< NonTerminal< G > * > nonterminals_
Definition buffalo.h:715
Grammar(NonTerminal< G > &start)
Definition buffalo.h:880
std::vector< std::pair< NonTerminal< G > *, ProductionRule< G > > > production_rules_
List of all production rules along with their respective NonTerminal.
Definition buffalo.h:722
bool NonTerminalHasFirst(NonTerminal< G > &non_terminal, Terminal< G > &terminal) const
Definition buffalo.h:737
std::set< Terminal< G > * > terminals_
Definition buffalo.h:716
std::map< NonTerminal< G > *, std::set< Terminal< G > * > > first_
Definition buffalo.h:718
void GenerateFollowSet()
Definition buffalo.h:787
void RegisterSymbols(NonTerminal< G > *nonterminal)
Definition buffalo.h:844
NonTerminal< G > & root
Definition buffalo.h:725
std::vector< ProductionRule< G > > rules_
Definition buffalo.h:518
std::string name_
Definition buffalo.h:517
NonTerminal(NonTerminal &)=delete
char const * GetName() const noexcept
Definition buffalo.h:521
NonTerminal()=default
NonTerminal(ProductionRule< G > const &rule, std::string name="unknown")
Definition buffalo.h:531
NonTerminal(NonTerminal &&)=delete
NonTerminal(ProductionRuleList< G > const &rule_list, std::string name="unknown")
Definition buffalo.h:532
virtual std::expected< ValueTokenReference< G >, Error > Parse(std::string_view input) const =0
virtual ~Parser()=default
ParsingError(Location location, std::string_view message)
Definition buffalo.h:263
ProductionRuleList & operator|(ProductionRule< G > const &rule)
Definition buffalo.h:691
std::vector< ProductionRule< G > > rules
Definition buffalo.h:689
ProductionRule & operator<=>(typename NonTerminal< G >::TransductorType tranductor)
Definition buffalo.h:629
ProductionRule & operator+(NonTerminal< G > &rhs)
Definition buffalo.h:609
NonTerminal< G >::TransductorType transductor_
Definition buffalo.h:593
ProductionRule & operator+(Terminal< G > &rhs)
Definition buffalo.h:602
bool operator==(ProductionRule< G > const &other) const
Definition buffalo.h:636
ProductionRule(NonTerminal< G > &nonterminal)
Definition buffalo.h:673
void Transduce(ValueTokenAccessor< G > &$) const
Definition buffalo.h:659
ProductionRule(Terminal< G > &terminal)
Definition buffalo.h:672
std::vector< Symbol< G > > sequence_
Definition buffalo.h:594
std::size_t precedence
Definition buffalo.h:595
ProductionRuleModifiers< G > mod_
Definition buffalo.h:599
NonTerminal< G > * non_terminal_
Definition buffalo.h:597
ProductionRule & Short(Terminal< G > &lookahead)
Definition buffalo.h:622
Grammar< G > const & GetGrammar() const
Definition buffalo.h:1287
static std::expected< SLRParser, Error > Build(NonTerminal< G > &start)
Definition buffalo.h:1355
SLRParser(NonTerminal< G > &start)
Definition buffalo.h:1284
SLRParser()=delete
std::expected< ValueTokenReference< G >, Error > Parse(std::string_view input) const override
Definition buffalo.h:1292
Associativity associativity
Definition buffalo.h:424
virtual std::optional< Token< G > > Lex(std::string_view input) const
Definition buffalo.h:442
std::string name_
Definition buffalo.h:414
Terminal(Terminal< G > const &)=delete
Terminal(Terminal< G > &&)=delete
char const * GetName() const noexcept
Definition buffalo.h:447
G::UserDataType user_data
Definition buffalo.h:425
ReasonerType reasoner_
Definition buffalo.h:418
std::size_t precedence
Definition buffalo.h:423
Terminal()=default
static std::size_t counter_
Definition buffalo.h:416
G::ValueType Reason(Token< G > const &token) const
Definition buffalo.h:432
typename G::ValueType(*)(Token< G > const &) ReasonerType
Definition buffalo.h:411
G::ValueType & operator[](std::size_t i)
Definition buffalo.h:356
ValueTokenReference< G > operator()(std::size_t i)
Definition buffalo.h:361
ValueTokenAccessor & operator=(typename G::ValueType value)
Definition buffalo.h:366
ValueToken< G > & GetValueToken() const
Definition buffalo.h:333
G::ValueType & GetValue() const
Definition buffalo.h:338
ValueTokenReference(std::size_t index, std::shared_ptr< ValueTokenStore< G > > store)
Definition buffalo.h:343
Definition buffalo.h:22
constexpr bool always_false_v
Definition buffalo.h:28
LRActionType
Definition buffalo.h:1057
ProductionRuleList< G > operator|(ProductionRule< G > const &lhs, ProductionRule< G > const &rhs)
Definition buffalo.h:922
Associativity
Definition buffalo.h:396
@ Left
Definition buffalo.h:398
@ None
Definition buffalo.h:397
@ Right
Definition buffalo.h:399
std::variant< Terminal< G > *, NonTerminal< G > * > Symbol
Definition buffalo.h:560
ProductionRule< G > PR
Definition buffalo.h:680
ProductionRule< G > operator+(Terminal< G > &lhs, Terminal< G > &rhs)
Definition buffalo.h:898
std::string utf32_to_string(char32_t const *utf32, std::size_t size)
Definition buffalo.h:56
std::size_t lrstate_id_t
Definition buffalo.h:963
LRActionType type
Definition buffalo.h:1071
ProductionRule< G > const * rule
Definition buffalo.h:1076
lrstate_id_t state
Definition buffalo.h:1075
Symbol< G > NextSymbol() const
Definition buffalo.h:948
LRItem()=delete
LRItem(ProductionRule< G > const *rule, int position=0)
Definition buffalo.h:958
LRItem Advance() const
Definition buffalo.h:943
std::size_t position
Definition buffalo.h:936
ProductionRule< G > const * rule
Definition buffalo.h:934
ProductionRuleModifiers< G > mod
Definition buffalo.h:935
bool operator==(LRItem const &other) const
Definition buffalo.h:953
bool Complete() const
Definition buffalo.h:938
LRState()=default
LRState(NonTerminal< G > const *start)
Definition buffalo.h:1042
bool operator==(LRState const &other) const
Definition buffalo.h:1030
std::vector< LRItem< G > > GenerateClosure() const
Definition buffalo.h:974
std::vector< LRItem< G > > kernel_items
Definition buffalo.h:972
std::map< Symbol< G >, LRState< G > > GenerateTransitions() const
Definition buffalo.h:1009
std::string_view SnippetString(std::size_t padding=10) const
Definition buffalo.h:136
std::size_t begin
Definition buffalo.h:128
std::string_view buffer
Definition buffalo.h:127
std::size_t end
Definition buffalo.h:129
bool operator==(ProductionRuleModifiers const &other) const
Definition buffalo.h:567
std::set< Terminal< G > * > short_circuit
Definition buffalo.h:565
ProductionRuleModifiers & operator+=(ProductionRuleModifiers const &other)
Definition buffalo.h:572
Location location
Definition buffalo.h:283
Terminal< G > * terminal
Definition buffalo.h:281
std::size_t Size() const
Definition buffalo.h:285
std::string_view raw
Definition buffalo.h:282
Location location
Definition buffalo.h:299
std::string_view raw
Definition buffalo.h:298
G::ValueType value
Definition buffalo.h:300
constexpr void operator()(T) const
Definition buffalo.h:36