buffalo
Loading...
Searching...
No Matches
buffalo.h
Go to the documentation of this file.
1#ifndef BUFFALO2_H
2#define BUFFALO2_H
3
4#include <algorithm>
5#include <cctype>
6#include <ctll.hpp>
7#include <ctre.hpp>
8#include <expected>
9#include <format>
10#include <list>
11#include <map>
12#include <optional>
13#include <ranges>
14#include <set>
15#include <stdexcept>
16#include <variant>
17#include <vector>
18#include <sstream>
19#include <memory>
20
21namespace bf
22{
23 /*
24 * Helper for std::visit provided by Andreas Fertig.
25 * https://andreasfertig.blog/2023/07/visiting-a-stdvariant-safely/
26 */
27 template<class...>
28 constexpr bool always_false_v = false;
29
30 template<class... Ts>
31 struct overload : Ts...
32 {
33 using Ts::operator()...;
34
35 template<typename T>
36 constexpr void operator()(T) const
37 {
38 static_assert(always_false_v<T>, "Unsupported type");
39 }
40 };
41
42 template<class... Ts>
43 overload(Ts...) -> overload<Ts...>;
44
48 struct Dummy {};
49
56 [[nodiscard]] inline std::string utf32_to_string(char32_t const *utf32, std::size_t size)
57 {
58 std::string buffer(size, '0');
59
60 for (std::size_t i = 0; i < size; i++)
61 {
62 buffer[i] = static_cast<char>(utf32[i]);
63 }
64
65 return std::move(buffer);
66 }
67
68 /*
69 * GRAMMAR DEFINITION
70 */
71 template<typename T>
72 concept IGrammar = requires(T)
73 {
74 typename T::ValueType;
75 typename T::UserDataType;
76 };
77
78 /*
79 * FORWARD-DECLS
80 */
81 template<IGrammar G>
82 class Grammar;
83
84 template<IGrammar G>
86
87 template<IGrammar G>
89
90 template<IGrammar G>
91 class ProductionRule;
92
93 template<IGrammar G>
95
96 template<IGrammar G>
97 class Terminal;
98
99 template<IGrammar G, ctll::fixed_string regex, typename SemanticType>
100 class DefineTerminal;
101
102 template<IGrammar G>
103 class NonTerminal;
104
105 template<IGrammar G, ctll::fixed_string DebugName, typename SemanticType>
106 class DefineNonTerminal;
107
108 template<IGrammar G>
109 struct LRItem;
110
111 template<IGrammar G>
112 struct LRState;
113
114 template<IGrammar G>
115 class Parser;
116
117 template<IGrammar G>
118 class SLRParser;
119
125 struct Location
126 {
127 std::string_view buffer;
128 std::size_t begin;
129 std::size_t end;
130
136 [[nodiscard]] std::string_view SnippetString(std::size_t padding = 10) const
137 {
138 std::size_t start = std::max(static_cast<std::size_t>(0), this->begin - padding);
139 std::size_t n = this->end - this->begin + (padding * 2);
140
141 return buffer.substr(start, n);
142 }
143 };
144
145 class Error : public std::exception
146 {
147 protected:
148 std::string message_;
149
150 public:
151 [[nodiscard]] char const *what() const noexcept override
152 {
153 return this->message_.c_str();
154 }
155
156 Error(std::string message) : message_(std::move(message)) {}
157 Error() = default;
158 };
159
160 template<IGrammar G>
162 {
163 static void AppendRule(std::stringstream &stream, ProductionRule<G> const *rule, int dot = -1)
164 {
165 stream << "\t" << rule->non_terminal_->GetName() << " -> ";
166
167 for (int i = 0; i < rule->sequence_.size(); i++)
168 {
169 if (i == dot)
170 {
171 stream << ". ";
172 }
173
174 char const *name = std::visit(overload{
175 [](Terminal<G> *terminal)
176 {
177 return terminal->GetName();
178 },
180 {
181 return non_terminal->GetName();
182 }
183 }, rule->sequence_[i]);
184
185 stream << name << " ";
186
187 if (dot == rule->sequence_.size() && i == rule->sequence_.size() - 1)
188 {
189 stream << ".";
190 }
191 }
192
193 stream << "\n";
194 }
195
196 public:
198 {
199 char const *reduce_name = reduce->non_terminal_->GetName();
200 char const *lookahead_name = lookahead->GetName();
201
202 std::stringstream message;
203 message << "Grammar contains an irreconcilable shift-reduce conflict when deciding to reduce " << reduce_name << " or shift " << lookahead_name << ".\n";
204 message << "Shift-reduce conflicts can be solved by:\n";
205 message << "\t1. Refactoring your grammar.\n";
206 message << "\t2. Adding precedence to your terminals.\n";
207 message << "\t3. Adding Associativity to your terminals.\n\n";
208 message << "The conflict arose in the following state with the following closure:\n";
209
210 for (LRItem<G> &item : state.GenerateClosure())
211 {
212 GrammarDefinitionError::AppendRule(message, item.rule, static_cast<int>(item.position));
213 }
214
215 message << "When deciding to reduce the following rule:\n";
216 GrammarDefinitionError::AppendRule(message, reduce);
217
218 message << "Or shift " << lookahead_name << " to the following state:\n";
219
220 for (LRItem<G> &item : shift.GenerateClosure())
221 {
222 GrammarDefinitionError::AppendRule(message, item.rule, static_cast<int>(item.position));
223 }
224
225 return {message.str()};
226 }
227
229 {
230 char const *a_name = a->non_terminal_->GetName();
231 char const *b_name = b->non_terminal_->GetName();
232
233 std::stringstream message;
234 message << "Grammar contains an irreconcilable reduce-reduce conflict between " << a_name << "/" << b_name << ".\n";
235 message << "Reduce-reduce conflicts are normally solved by refactoring your grammar.\n";
236 message << "The conflict arose in the following two rules:\n\n";
237
238 for (auto const rule : {a, b})
239 {
240 GrammarDefinitionError::AppendRule(message, rule);
241 }
242
243 message << "With lookahead " << lookahead->GetName() << "\n";
244
245 message << "In the state with the following closure:\n";
246 for (LRItem<G> &item : state.GenerateClosure())
247 {
248 GrammarDefinitionError::AppendRule(message, item.rule, static_cast<int>(item.position));
249 }
250
251 return {message.str()};
252 }
253
255 GrammarDefinitionError() : Error("Unknown Grammar Definition Error") {}
256 };
257
258 class ParsingError : public Error
259 {
260 Location location_;
261
262 public:
263 ParsingError(Location location, std::string_view message) : location_(location)
264 {
265 std::stringstream mstream;
266 mstream << message << "\n";
267 mstream << "\t" << location.SnippetString(10) << "\n";
268 mstream << "\t" << std::string(10, ' ') << '^' << std::string(location.end - location.begin, '~') << "\n";
269
270 this->message_ = mstream.str();
271 }
272 };
273
278 template<IGrammar G>
279 struct Token
280 {
282 std::string_view raw;
284
285 std::size_t Size() const
286 {
287 return this->location.end - this->location.begin;
288 }
289 };
290
295 template<IGrammar G>
297 {
298 std::string_view raw;
301
302 [[nodiscard]] operator typename G::ValueType()
303 {
304 return this->value;
305 }
306
307 ValueToken(std::string_view raw, Location location, typename G::ValueType &&value) : raw(raw), location(location), value(std::move(value)) {}
308 };
309
310 template<IGrammar G>
312 {
313 friend class ValueTokenReference<G>;
314 friend class ValueTokenAccessor<G>;
315 friend class SLRParser<G>;
316
317 std::vector<ValueToken<G>> values_;
318
319 public:
321 {
322 this->values_.reserve(100);
323 }
324 };
325
326 template<IGrammar G>
328 {
329 std::size_t index_;
330 std::shared_ptr<ValueTokenStore<G>> store_;
331
332 public:
334 {
335 return this->store_->values_[this->index_];
336 }
337
338 [[nodiscard]] typename G::ValueType &GetValue() const
339 {
340 return this->store_->values_[this->index_].value;
341 }
342
343 ValueTokenReference(std::size_t index, std::shared_ptr<ValueTokenStore<G>> store) : index_(index), store_(store) {}
344 };
345
346 template<IGrammar G>
348 {
349 std::span<std::size_t> indices_;
350 std::shared_ptr<ValueTokenStore<G>> store_;
351
352 std::string_view raw_;
353 Location location_;
354
355 public:
356 [[nodiscard]] typename G::ValueType &operator[](std::size_t i)
357 {
358 return this->store_->values_[this->indices_[i]].value;
359 }
360
362 {
363 return {this->indices_[i], this->store_};
364 }
365
367 {
368 this->store_->values_.emplace_back(this->raw_, this->location_, std::move(value));
369
370 return *this;
371 }
372
373 ValueTokenAccessor(std::span<std::size_t> indices, std::shared_ptr<ValueTokenStore<G>> store) : indices_(indices), store_(store) {}
374 };
375
380 template<typename GValueType, typename GUserDataType = Dummy>
381 requires std::constructible_from<GValueType>
383 {
384 public:
385 /*
386 * TYPES
387 */
390 };
391
401
405 template<IGrammar G>
407 {
408 friend class Grammar<G>;
409
410 public:
411 using ReasonerType = typename G::ValueType(*)(Token<G> const&);
412
413 protected:
414 std::string name_ = "\"UNKNOWN\"";
415
416 inline static std::size_t counter_ = 0;
417
419
420 Terminal() = default;
421
422 public:
426
432 typename G::ValueType Reason(Token<G> const &token) const
433 {
434 if(this->reasoner_)
435 {
436 return this->reasoner_(token);
437 }
438
439 return {};
440 }
441
442 virtual std::optional<Token<G>> Lex(std::string_view input) const
443 {
444 return std::nullopt;
445 }
446
447 [[nodiscard]] char const *GetName() const noexcept
448 {
449 return this->name_.c_str();
450 }
451
452 Terminal(Terminal<G> &&) = delete;
453 Terminal(Terminal<G> const &) = delete;
454 };
455
460 template<IGrammar G, ctll::fixed_string regex, typename SemanticType = Dummy>
461 class DefineTerminal : public Terminal<G>
462 {
463 public:
465 {
466 return std::get<SemanticType>(value);
467 }
468
469 std::optional<Token<G>> Lex(std::string_view input) const override
470 {
471 auto match = ctre::starts_with<regex>(input);
472
473 if(!match)
474 {
475 return std::nullopt;
476 }
477
478 return Token<G> {
479 .terminal = (Terminal<G>*)this,
480 .raw = match.view(),
481 .location = {
482 .begin = 0,
483 .end = match.size(),
484 },
485 };
486 }
487
489 {
490 this->name_ = std::format("\"{}\"", utf32_to_string(regex.content, regex.size()));
491 this->associativity = assoc;
492 this->user_data = user_data;
493 this->reasoner_ = reasoner;
494 }
495
497
500
502 };
503
507 template<IGrammar G>
509 {
510 friend class Grammar<G>;
511 friend struct LRState<G>;
512
513 public:
515
516 protected:
517 std::string name_ = "unknown";
518 std::vector<ProductionRule<G>> rules_;
519
520 public:
521 [[nodiscard]] char const *GetName() const noexcept
522 {
523 return this->name_.c_str();
524 }
525
528
529 NonTerminal() = default;
530
531 NonTerminal(ProductionRule<G> const &rule, std::string name = "unknown") : rules_({rule}), name_(std::move(name)) {}
532 NonTerminal(ProductionRuleList<G> const &rule_list, std::string name = "unknown") : rules_(rule_list.rules), name_(std::move(name)) {}
533 };
534
538 template<IGrammar G, ctll::fixed_string DebugName = "Generic", typename SemanticValue = Dummy>
555
559 template<IGrammar G>
560 using Symbol = std::variant<Terminal<G>*, NonTerminal<G>*>;
561
562 /*
563 * PRODUCTION RULE MODIFIERS
564 */
565
570 template<IGrammar G>
571 struct Short
572 {
573 std::set<Terminal<G> *> terminals;
574
575 Short(Terminal<G> *terminal) : terminals(terminal) {}
577 };
578
582 struct Unproductive {};
583
584 /*
585 * PRODUCTION RULES
586 */
587 template<IGrammar G>
589 {
590 friend class Grammar<G>;
591 friend struct LRItem<G>;
592 friend class Parser<G>;
593 friend class SLRParser<G>;
594 friend class GrammarDefinitionError<G>;
595
596 protected:
598 std::vector<Symbol<G>> sequence_;
599 std::size_t precedence = -1;
600
602
603 std::set<Terminal<G>*> short_circuit_;
604
605 struct {} attr_;
606
607 public:
609 {
610 this->sequence_.push_back(&rhs);
611
612 return *this;
613 }
614
616 {
617 this->sequence_.push_back(&rhs);
618
619 return *this;
620 }
621
629 {
630 this->mod_.short_circuit.insert(&lookahead);
631
632 return *this;
633 }
634
636 {
637 this->transductor_ = tranductor;
638
639 return *this;
640 }
641
643 {
644 // NonTerminal
645 if(this->non_terminal_ != other.non_terminal_)
646 {
647 return false;
648 }
649
650 if (this->mod_ != other.mod_)
651 {
652 return false;
653 }
654
655 // Sequence
656 if(this->sequence_.size() != other.sequence_.size()) return false;
657 for(auto const &[first, second] : std::views::zip(this->sequence_, other.sequence_))
658 {
659 if(first != second) return false;
660 }
661
662 return true;
663 }
664
666 {
667 if(this->transductor_)
668 {
669 this->transductor_($);
670 }
671 // If no transductor is defined, return the first element in sequence.
672 else if (this->sequence_.size() > 0)
673 {
674 $ = $[0];
675 }
676 }
677
678 ProductionRule(Terminal<G> &terminal) : sequence_({ &terminal }) {}
680 };
681
685 template<IGrammar G>
687
692 template<IGrammar G>
694 {
695 std::vector<ProductionRule<G>> rules;
696
698 {
699 this->rules.push_back(rule);
700
701 return *this;
702 }
703 };
704
705 /*
706 * GRAMMAR
707 */
708 template<IGrammar G>
710 {
711 friend class Parser<G>;
712 friend class SLRParser<G>;
713
714 protected:
719 std::unique_ptr<Terminal<G>> EOS;
720
721 std::set<NonTerminal<G>*> nonterminals_;
722 std::set<Terminal<G>*> terminals_;
723
724 std::map<NonTerminal<G>*, std::set<Terminal<G>*>> first_;
725 std::map<NonTerminal<G>*, std::set<Terminal<G>*>> follow_;
726
728 std::vector<std::pair<NonTerminal<G>*, ProductionRule<G>>> production_rules_;
729
730 public:
732
734 {
735 return this->nonterminals_.contains(&non_terminal);
736 }
737
739 {
740 return this->follow_.at(&non_terminal).contains(&terminal);
741 }
742
744 {
745 return this->first_.at(&non_terminal).contains(&terminal);
746 }
747
753 {
754 bool has_change;
755 do
756 {
757 has_change = false;
758
759 for(auto const &[nonterminal, rule] : this->production_rules_)
760 {
761 if(rule.sequence_.empty()) continue;
762
763 has_change |= std::visit(overload{
764 [&](Terminal<G> *terminal)
765 {
766 auto [it, inserted] = this->first_[nonterminal].insert(terminal);
767 return inserted;
768 },
770 {
772 {
773 return false;
774 }
775
776 auto &parent_first = this->first_[nonterminal];
777 auto &child_first = this->first_[child_nonterminal];
778
779 std::size_t parent_size = parent_first.size();
780 parent_first.insert(child_first.begin(), child_first.end());
781
782 return parent_size != parent_first.size();
783 }
784 }, rule.sequence_[0]);
785 }
786 } while(has_change);
787 }
788
794 {
795 this->follow_[&root] = { this->EOS.get() };
796
797 bool has_change;
798 do
799 {
800 has_change = false;
801
802 for(auto &[nonterminal, rule] : this->production_rules_)
803 {
804 for(int i = 0; i < rule.sequence_.size(); i++)
805 {
806 // Skip over Terminals
807 if(std::holds_alternative<Terminal<G>*>(rule.sequence_[i])) continue;
808
809 // Process NonTerminal
810 auto symbol = std::get<NonTerminal<G>*>(rule.sequence_[i]);
811
812 // If this is the last NonTerminal in the sequence, then it gets all of the FOLLOW of parent.
813 if(i == rule.sequence_.size() - 1)
814 {
815 auto &parent_follow = this->follow_[nonterminal];
816 auto &child_follow = this->follow_[symbol];
817
818 std::size_t child_size = child_follow.size();
819 child_follow.insert(parent_follow.begin(), parent_follow.end());
820
822 continue;
823 }
824
825 // ELSE process the next token
826 auto follow = rule.sequence_[i + 1];
827
828 has_change |= std::visit(overload{
829 [&](Terminal<G> *terminal)
830 {
831 auto [it, inserted] = this->follow_[symbol].insert(terminal);
832 return inserted;
833 },
835 {
836 auto &symbol_follow = this->follow_[symbol];
837 auto &follow_first = this->first_[follow_nonterminal];
838
839 std::size_t symbol_size = symbol_follow.size();
840 symbol_follow.insert(follow_first.begin(), follow_first.end());
841
842 return symbol_size != symbol_follow.size();
843 }
844 }, follow);
845 }
846 }
847 } while(has_change);
848 }
849
851 {
852 this->nonterminals_.insert(nonterminal);
853
854 for(auto &rule : nonterminal->rules_)
855 {
856 rule.non_terminal_ = nonterminal;
857 Terminal<G> *last_terminal = nullptr;
858
859 this->production_rules_.push_back({nonterminal, rule});
860
861 for(auto &symbol : rule.sequence_)
862 {
863 std::visit(overload{
864 [&](Terminal<G> *terminal)
865 {
866 last_terminal = terminal;
867 this->terminals_.insert(terminal);
868 },
870 {
871 if(this->nonterminals_.contains(child_nonterminal)) return;
872
874 }
875 }, symbol);
876 }
877
878 // Rule precedence defaults to the precedence of the LAST terminal in sequence.
879 if(last_terminal)
880 {
881 rule.precedence = last_terminal->precedence;
882 }
883 }
884 }
885
887 {
888 this->EOS = std::make_unique<DefineTerminal<G, R"(\Z)">>();
889 this->terminals_.insert(this->EOS.get());
890
891 this->RegisterSymbols(&start);
892
893 this->GenerateFirstSet();
894 this->GenerateFollowSet();
895 }
896
897 Grammar() = delete;
898 };
899
900 /*
901 * PRODUCTION RULE COMPOSITION FUNCTIONS
902 */
903 template<IGrammar G>
908
909 template<IGrammar G>
914
915 template<IGrammar G>
920
921 template<IGrammar G>
926
927 template<IGrammar G>
932
937 template<IGrammar G>
938 struct LRItem
939 {
942 std::size_t position;
943
944 [[nodiscard]] bool Complete() const
945 {
946 return this->position >= this->rule->sequence_.size();
947 }
948
950 {
951 return LRItem(this->rule, this->position + 1);
952 }
953
955 {
956 return this->rule->sequence_[this->position];
957 }
958
959 bool operator==(LRItem const &other) const
960 {
961 return *this->rule == *other.rule && this->mod == other.mod && this->position == other.position;
962 }
963
965
966 LRItem() = delete;
967 };
968
969 using lrstate_id_t = std::size_t;
970
975 template<IGrammar G>
976 struct LRState
977 {
978 std::vector<LRItem<G>> kernel_items;
979
980 std::vector<LRItem<G>> GenerateClosure() const
981 {
982 std::vector<LRItem<G>> closure = this->kernel_items;
983 std::set<NonTerminal<G>*> closed_nonterminals;
984
985 for(int i = 0; i < closure.size(); i++)
986 {
987 if(closure[i].Complete()) continue;
988
989 std::visit(overload{
990 [&](Terminal<G> *terminal)
991 {
992 // Remove this rule if it is present it item's short-circuit list
993 if (closure[i].mod.short_circuit.contains(terminal))
994 {
995 closure.erase(closure.begin() + (i--));
996 }
997 },
999 {
1000 if(closed_nonterminals.contains(non_terminal)) return;
1002
1003 for(auto const &rule : non_terminal->rules_)
1004 {
1005 auto &new_item = closure.emplace_back(&rule);
1006 new_item.mod += closure[i].mod;
1007 }
1008 }
1009 }, closure[i].NextSymbol());
1010 }
1011
1012 return closure;
1013 }
1014
1015 std::map<Symbol<G>, LRState<G>> GenerateTransitions() const
1016 {
1017 std::map<Symbol<G>, LRState<G>> transitions;
1018
1019 auto closure = this->GenerateClosure();
1020
1021 for(auto const &item : closure)
1022 {
1023 if (item.Complete()) continue;
1024
1025 transitions[item.NextSymbol()].kernel_items.push_back(item.Advance());
1026 }
1027
1028 return transitions;
1029 }
1030
1036 bool operator==(LRState const &other) const
1037 {
1038 if(this->kernel_items.size() != other.kernel_items.size()) return false;
1039
1040 for(int i = 0; i < this->kernel_items.size(); i++)
1041 {
1042 if(this->kernel_items[i] != other.kernel_items[i]) return false;
1043 }
1044
1045 return true;
1046 }
1047
1049 {
1050 for(auto const &rule : start->rules_)
1051 {
1052 this->kernel_items.emplace_back(&rule);
1053 }
1054 }
1055
1056 LRState() = default;
1057 };
1058
1062 enum class LRActionType
1063 {
1064 kError,
1065 kAccept,
1066 kShift,
1067 kReduce,
1068 };
1069
1074 template<IGrammar G>
1076 {
1078
1079 union
1080 {
1083 };
1084 };
1085
1090 template<IGrammar G>
1092 {
1093 public:
1094 virtual std::expected<ValueTokenReference<G>, Error> Parse(std::string_view input) const = 0;
1095
1096 virtual ~Parser() = default;
1097 };
1098
1103 template<IGrammar G>
1104 class SLRParser final : public Parser<G>
1105 {
1106 Grammar<G> grammar_;
1107
1108 std::map<lrstate_id_t, std::map<Terminal<G>*, LRAction<G>>> action_;
1109 std::map<lrstate_id_t, std::map<NonTerminal<G>*, lrstate_id_t>> goto_;
1110
1111 [[nodiscard]] LRAction<G> const &LookupAction(lrstate_id_t state, Terminal<G> *lookahead) const
1112 {
1113 return this->action_.at(state).at(lookahead);
1114 }
1115
1116 [[nodiscard]] lrstate_id_t LookupGoto(lrstate_id_t state, NonTerminal<G> *non_terminal) const
1117 {
1118 return this->goto_.at(state).at(non_terminal);
1119 }
1120
1121 struct Tokenizer
1122 {
1123 SLRParser<G> const &parser;
1124 std::string_view input;
1125 std::size_t index = 0;
1126
1127 std::optional<Token<G>> Next(lrstate_id_t state = 0)
1128 {
1129 while(this->index < this->input.size() && std::isspace(this->input[this->index])) this->index++;
1130
1131 for(auto terminal : this->parser.action_.at(state) | std::views::keys)
1132 {
1133 auto token = terminal->Lex(this->input.substr(this->index));
1134 if(token)
1135 {
1136 token->location.begin += this->index;
1137 token->location.end += this->index;
1138
1139 this->index += token->Size();
1140
1141 return token;
1142 }
1143 }
1144
1145 return std::nullopt;
1146 }
1147
1148 Tokenizer(SLRParser<G> const &parser, std::string_view input) : parser(parser), input(input) {}
1149 };
1150
1157 lrstate_id_t FindOrInsertLRState(std::vector<LRState<G>> &state_list, LRState<G> const &state)
1158 {
1159 auto it = std::ranges::find(state_list, state);
1160
1161 // Does not exist, add it in.
1162 if(it == state_list.end())
1163 {
1164 state_list.push_back(state);
1165 return state_list.size() - 1;
1166 }
1167
1168 // Otherwise return index.
1169 return std::distance(state_list.begin(), it);
1170 }
1171
1175 std::optional<Error> BuildParsingTables()
1176 {
1177 std::vector<LRState<G>> states{};
1178 states.emplace_back(&this->grammar_.root);
1179
1180 // Finite State Machine
1181 std::map<lrstate_id_t, std::map<Symbol<G>, lrstate_id_t>> fsm;
1182
1183 for(lrstate_id_t i = 0; i < states.size(); i++)
1184 {
1185 // Process all transitions
1186 for(auto const &[symbol, new_state] : states[i].GenerateTransitions())
1187 {
1188 lrstate_id_t new_state_id = this->FindOrInsertLRState(states, new_state);
1190
1191 // Create SHIFT/GOTO entries in parsing tables
1192 std::visit(overload{
1193 // Create ACTION
1194 [&](Terminal<G> *terminal)
1195 {
1196 this->action_[i][terminal] = {
1197 .type = LRActionType::kShift,
1198 .state = new_state_id,
1199 };
1200 },
1201
1202 // Create GOTO
1204 {
1205 this->goto_[i][non_terminal] = new_state_id;
1206 },
1207 }, symbol);
1208 }
1209
1210 // Create REDUCE/ACCEPT entries in parsing tables
1211 for(auto const &item : states[i].kernel_items)
1212 {
1213 if(!item.Complete()) continue;
1214
1215 for(auto follow_terminal : this->grammar_.follow_[item.rule->non_terminal_])
1216 {
1217 // There will be no conflict
1218 if (!this->action_[i].contains(follow_terminal))
1219 {
1220 this->action_[i][follow_terminal] = {
1221 .type = LRActionType::kReduce,
1222 .rule = item.rule,
1223 };
1224 continue;
1225 }
1226
1227 // There will be conflict (FIGHT!)
1228 LRAction<G> conflict = this->action_[i][follow_terminal];
1229 if (conflict.type == LRActionType::kShift) // SHIFT-REDUCE
1230 {
1231 // Reduce due to higher precedence
1232 if(item.rule->precedence < follow_terminal->precedence)
1233 {
1234 this->action_[i][follow_terminal] = {
1236 .rule = item.rule,
1237 };
1238 continue;
1239 }
1240
1241 // Shift due to lower precedence
1242 if(item.rule->precedence > follow_terminal->precedence)
1243 {
1244 continue;
1245 }
1246
1247 // Reduce due to associativity rule
1248 if(follow_terminal->associativity == Associativity::Left)
1249 {
1250 this->action_[i][follow_terminal] = {
1251 .type = LRActionType::kReduce,
1252 .rule = item.rule,
1253 };
1254 continue;
1255 }
1256
1257 // Shift due to associativity rule
1258 if(follow_terminal->associativity == Right)
1259 {
1260 continue;
1261 }
1262
1263 // Unable to resolve conflict
1265 }
1266 else if (conflict.type == LRActionType::kReduce) // REDUCE-REDUCE
1267 {
1269 }
1270 }
1271 }
1272 }
1273
1274 this->action_[0][this->grammar_.EOS.get()] = {
1275 .type = LRActionType::kAccept,
1276 };
1277
1278 this->goto_[0][&this->grammar_.root] = 0;
1279
1280 return std::nullopt;
1281 }
1282
1283 protected:
1291
1292 public:
1293 Grammar<G> const &GetGrammar() const
1294 {
1295 return this->grammar_;
1296 }
1297
1298 std::expected<ValueTokenReference<G>, Error> Parse(std::string_view input) const override
1299 {
1300 constexpr int expected_max_pr_length = 50;
1301
1302 Tokenizer tokenizer(*this, input);
1303
1304 std::vector<lrstate_id_t> stack;
1306 stack.push_back(0);
1307
1308 std::vector<std::size_t> value_stack;
1310
1311 auto store = std::make_shared<ValueTokenStore<G>>();
1312
1313 std::optional<Token<G>> lookahead = tokenizer.Next(stack.back());
1314 while(true)
1315 {
1316 if(!lookahead)
1317 {
1318 Location location = {
1319 .buffer = input,
1320 .begin = tokenizer.index,
1321 .end = tokenizer.index,
1322 };
1323 return std::unexpected(ParsingError{location, "Unexpected Token!"});
1324 }
1325
1326 auto &action = this->LookupAction(stack.back(), lookahead->terminal);
1327 if (action.type == LRActionType::kAccept)
1328 {
1329 return ValueTokenReference<G>(store->values_.size() - 1, store);
1330 }
1331 else if (action.type == LRActionType::kShift)
1332 {
1333 typename G::ValueType value = std::move(lookahead->terminal->Reason(*lookahead));
1334 store->values_.emplace_back(lookahead->raw, lookahead->location, std::move(value));
1335
1336 stack.push_back(action.state);
1337 value_stack.push_back(store->values_.size() - 1);
1338
1339 lookahead = tokenizer.Next(stack.back());
1340 }
1341 else if (action.type == LRActionType::kReduce)
1342 {
1343 auto sequence_size = action.rule->sequence_.size();
1345
1346 action.rule->Transduce(accessor);
1347
1348 stack.erase(stack.end() - sequence_size, stack.end());
1349 stack.push_back(this->LookupGoto(stack.back(), action.rule->non_terminal_));
1350
1351 value_stack.erase(value_stack.end() - sequence_size, value_stack.end());
1352 value_stack.push_back(store->values_.size() - 1);
1353 }
1354 else
1355 {
1356 return std::unexpected(ParsingError(lookahead->location, "Unexpected Token!"));
1357 }
1358 }
1359 }
1360
1361 static std::expected<SLRParser, Error> Build(NonTerminal<G> &start)
1362 {
1363 SLRParser parser(start);
1364
1365 auto error = parser.BuildParsingTables();
1366 if(error)
1367 {
1368 return std::unexpected(*error);
1369 }
1370
1371 return std::move(parser);
1372 }
1373
1377 SLRParser() = delete;
1378 };
1379}
1380
1381#endif //BUFFALO2_H
bf::test::CalculatorG G
Definition calculator.cpp:8
DefineNonTerminal(ProductionRule< G > const &rule)
Definition buffalo.h:552
DefineNonTerminal(NonTerminal< G > &single_non_terminal)
Definition buffalo.h:550
DefineNonTerminal(ProductionRuleList< G > const &rule_list)
Definition buffalo.h:553
DefineNonTerminal(Terminal< G > &single_terminal)
Definition buffalo.h:549
SemanticValue & operator()(typename G::ValueType &value)
Definition buffalo.h:542
constexpr DefineTerminal(typename G::UserDataType user_data)
Definition buffalo.h:498
SemanticType & operator()(typename G::ValueType &value)
Definition buffalo.h:464
constexpr DefineTerminal(Associativity assoc=bf::None, typename G::UserDataType user_data={}, typename Terminal< G >::ReasonerType reasoner=nullptr)
Definition buffalo.h:488
constexpr DefineTerminal(Associativity assoc, typename Terminal< G >::ReasonerType reasoner)
Definition buffalo.h:496
constexpr DefineTerminal(typename G::UserDataType user_data, typename Terminal< G >::ReasonerType reasoner)
Definition buffalo.h:499
std::optional< Token< G > > Lex(std::string_view input) const override
Definition buffalo.h:469
constexpr DefineTerminal(typename Terminal< G >::ReasonerType reasoner)
Definition buffalo.h:501
Error()=default
Error(std::string message)
Definition buffalo.h:156
char const * what() const noexcept override
Definition buffalo.h:151
std::string message_
Definition buffalo.h:148
static GrammarDefinitionError ReduceReduce(LRState< G > &state, ProductionRule< G > const *a, ProductionRule< G > const *b, Terminal< G > *lookahead)
Definition buffalo.h:228
static GrammarDefinitionError ShiftReduce(LRState< G > &state, LRState< G > &shift, ProductionRule< G > const *reduce, Terminal< G > *lookahead)
Definition buffalo.h:197
GrammarDefinitionError(std::string message)
Definition buffalo.h:254
GUserDataType UserDataType
Definition buffalo.h:389
GValueType ValueType
Definition buffalo.h:388
std::map< NonTerminal< G > *, std::set< Terminal< G > * > > follow_
Definition buffalo.h:725
Grammar()=delete
std::unique_ptr< Terminal< G > > EOS
Definition buffalo.h:719
void GenerateFirstSet()
Definition buffalo.h:752
bool NonTerminalHasFollow(NonTerminal< G > &non_terminal, Terminal< G > &terminal) const
Definition buffalo.h:738
bool HasNonTerminal(NonTerminal< G > &non_terminal) const
Definition buffalo.h:733
std::set< NonTerminal< G > * > nonterminals_
Definition buffalo.h:721
Grammar(NonTerminal< G > &start)
Definition buffalo.h:886
std::vector< std::pair< NonTerminal< G > *, ProductionRule< G > > > production_rules_
List of all production rules along with their respective NonTerminal.
Definition buffalo.h:728
bool NonTerminalHasFirst(NonTerminal< G > &non_terminal, Terminal< G > &terminal) const
Definition buffalo.h:743
std::set< Terminal< G > * > terminals_
Definition buffalo.h:722
std::map< NonTerminal< G > *, std::set< Terminal< G > * > > first_
Definition buffalo.h:724
void GenerateFollowSet()
Definition buffalo.h:793
void RegisterSymbols(NonTerminal< G > *nonterminal)
Definition buffalo.h:850
NonTerminal< G > & root
Definition buffalo.h:731
std::vector< ProductionRule< G > > rules_
Definition buffalo.h:518
std::string name_
Definition buffalo.h:517
NonTerminal(NonTerminal &)=delete
char const * GetName() const noexcept
Definition buffalo.h:521
NonTerminal()=default
NonTerminal(ProductionRule< G > const &rule, std::string name="unknown")
Definition buffalo.h:531
void(*)(ValueTokenAccessor< G > &) TransductorType
Definition buffalo.h:514
NonTerminal(NonTerminal &&)=delete
NonTerminal(ProductionRuleList< G > const &rule_list, std::string name="unknown")
Definition buffalo.h:532
virtual std::expected< ValueTokenReference< G >, Error > Parse(std::string_view input) const =0
virtual ~Parser()=default
ParsingError(Location location, std::string_view message)
Definition buffalo.h:263
ProductionRuleList & operator|(ProductionRule< G > const &rule)
Definition buffalo.h:697
std::vector< ProductionRule< G > > rules
Definition buffalo.h:695
ProductionRule & operator<=>(typename NonTerminal< G >::TransductorType tranductor)
Definition buffalo.h:635
ProductionRule & operator+(NonTerminal< G > &rhs)
Definition buffalo.h:615
NonTerminal< G >::TransductorType transductor_
Definition buffalo.h:597
std::set< Terminal< G > * > short_circuit_
Definition buffalo.h:603
ProductionRule & operator+(Terminal< G > &rhs)
Definition buffalo.h:608
bool operator==(ProductionRule< G > const &other) const
Definition buffalo.h:642
ProductionRule(NonTerminal< G > &nonterminal)
Definition buffalo.h:679
void Transduce(ValueTokenAccessor< G > &$) const
Definition buffalo.h:665
ProductionRule(Terminal< G > &terminal)
Definition buffalo.h:678
std::vector< Symbol< G > > sequence_
Definition buffalo.h:598
std::size_t precedence
Definition buffalo.h:599
NonTerminal< G > * non_terminal_
Definition buffalo.h:601
struct bf::ProductionRule::@0 attr_
ProductionRule & Short(Terminal< G > &lookahead)
Definition buffalo.h:628
Grammar< G > const & GetGrammar() const
Definition buffalo.h:1293
static std::expected< SLRParser, Error > Build(NonTerminal< G > &start)
Definition buffalo.h:1361
SLRParser(NonTerminal< G > &start)
Definition buffalo.h:1290
SLRParser()=delete
std::expected< ValueTokenReference< G >, Error > Parse(std::string_view input) const override
Definition buffalo.h:1298
Associativity associativity
Definition buffalo.h:424
virtual std::optional< Token< G > > Lex(std::string_view input) const
Definition buffalo.h:442
std::string name_
Definition buffalo.h:414
Terminal(Terminal< G > const &)=delete
Terminal(Terminal< G > &&)=delete
char const * GetName() const noexcept
Definition buffalo.h:447
G::UserDataType user_data
Definition buffalo.h:425
ReasonerType reasoner_
Definition buffalo.h:418
std::size_t precedence
Definition buffalo.h:423
Terminal()=default
static std::size_t counter_
Definition buffalo.h:416
G::ValueType Reason(Token< G > const &token) const
Definition buffalo.h:432
typename G::ValueType(*)(Token< G > const &) ReasonerType
Definition buffalo.h:411
G::ValueType & operator[](std::size_t i)
Definition buffalo.h:356
ValueTokenReference< G > operator()(std::size_t i)
Definition buffalo.h:361
ValueTokenAccessor & operator=(typename G::ValueType value)
Definition buffalo.h:366
ValueToken< G > & GetValueToken() const
Definition buffalo.h:333
G::ValueType & GetValue() const
Definition buffalo.h:338
ValueTokenReference(std::size_t index, std::shared_ptr< ValueTokenStore< G > > store)
Definition buffalo.h:343
Definition buffalo.h:22
constexpr bool always_false_v
Definition buffalo.h:28
LRActionType
Definition buffalo.h:1063
ProductionRuleList< G > operator|(ProductionRule< G > const &lhs, ProductionRule< G > const &rhs)
Definition buffalo.h:928
Associativity
Definition buffalo.h:396
@ Left
Definition buffalo.h:398
@ None
Definition buffalo.h:397
@ Right
Definition buffalo.h:399
std::variant< Terminal< G > *, NonTerminal< G > * > Symbol
Definition buffalo.h:560
ProductionRule< G > operator+(Terminal< G > &lhs, Terminal< G > &rhs)
Definition buffalo.h:904
std::string utf32_to_string(char32_t const *utf32, std::size_t size)
Definition buffalo.h:56
std::size_t lrstate_id_t
Definition buffalo.h:969
LRActionType type
Definition buffalo.h:1077
ProductionRule< G > const * rule
Definition buffalo.h:1082
lrstate_id_t state
Definition buffalo.h:1081
Symbol< G > NextSymbol() const
Definition buffalo.h:954
LRItem()=delete
LRItem(ProductionRule< G > const *rule, int position=0)
Definition buffalo.h:964
LRItem Advance() const
Definition buffalo.h:949
std::size_t position
Definition buffalo.h:942
ProductionRule< G > const * rule
Definition buffalo.h:940
ProductionRuleModifiers< G > mod
Definition buffalo.h:941
bool operator==(LRItem const &other) const
Definition buffalo.h:959
bool Complete() const
Definition buffalo.h:944
LRState()=default
LRState(NonTerminal< G > const *start)
Definition buffalo.h:1048
bool operator==(LRState const &other) const
Definition buffalo.h:1036
std::vector< LRItem< G > > GenerateClosure() const
Definition buffalo.h:980
std::vector< LRItem< G > > kernel_items
Definition buffalo.h:978
std::map< Symbol< G >, LRState< G > > GenerateTransitions() const
Definition buffalo.h:1015
std::string_view SnippetString(std::size_t padding=10) const
Definition buffalo.h:136
std::size_t begin
Definition buffalo.h:128
std::string_view buffer
Definition buffalo.h:127
std::size_t end
Definition buffalo.h:129
Short(Terminal< G > *terminal)
Definition buffalo.h:575
std::set< Terminal< G > * > terminals
Definition buffalo.h:573
Location location
Definition buffalo.h:283
Terminal< G > * terminal
Definition buffalo.h:281
std::size_t Size() const
Definition buffalo.h:285
std::string_view raw
Definition buffalo.h:282
Location location
Definition buffalo.h:299
std::string_view raw
Definition buffalo.h:298
G::ValueType value
Definition buffalo.h:300
constexpr void operator()(T) const
Definition buffalo.h:36