2727
2828#include < cstddef>
2929#include < cstdint>
30- #include < deque>
3130#include < list>
3231#include < memory>
3332#include < numeric>
@@ -831,23 +830,6 @@ namespace {
831830// / This class provides support for reading attribute and type entries from the
832831// / bytecode. Attribute and Type entries are read lazily on demand, so we use
833832// / this reader to manage when to actually parse them from the bytecode.
834- // /
835- // / The parsing of attributes & types are generally recursive, this can lead to
836- // / stack overflows for deeply nested structures, so we track a few extra pieces
837- // / of information to avoid this:
838- // /
839- // / - `depth`: The current depth while parsing nested attributes. We defer on
840- // / parsing deeply nested attributes to avoid potential stack overflows. The
841- // / deferred parsing is achieved by reporting a failure when parsing a nested
842- // / attribute/type and registering the index of the encountered attribute/type
843- // / in the deferred parsing worklist. Hence, a failure with deffered entry
844- // / does not constitute a failure, it also requires that folks return on
845- // / first failure rather than attempting additional parses.
846- // / - `deferredWorklist`: A list of attribute/type indices that we could not
847- // / parse due to hitting the depth limit. The worklist is used to capture the
848- // / indices of attributes/types that need to be parsed/reparsed when we hit
849- // / the depth limit. This enables moving the tracking of what needs to be
850- // / parsed to the heap.
851833class AttrTypeReader {
852834 // / This class represents a single attribute or type entry.
853835 template <typename T>
@@ -881,34 +863,12 @@ class AttrTypeReader {
881863 ArrayRef<uint8_t > sectionData,
882864 ArrayRef<uint8_t > offsetSectionData);
883865
884- LogicalResult readAttribute (uint64_t index, Attribute &result,
885- uint64_t depth = 0 ) {
886- return readEntry (attributes, index, result, " attribute" , depth);
887- }
888-
889- LogicalResult readType (uint64_t index, Type &result, uint64_t depth = 0 ) {
890- return readEntry (types, index, result, " type" , depth);
891- }
892-
893866 // / Resolve the attribute or type at the given index. Returns nullptr on
894867 // / failure.
895- Attribute resolveAttribute (size_t index, uint64_t depth = 0 ) {
896- return resolveEntry (attributes, index, " Attribute" , depth);
897- }
898- Type resolveType (size_t index, uint64_t depth = 0 ) {
899- return resolveEntry (types, index, " Type" , depth);
900- }
901-
902- Attribute getAttributeOrSentinel (size_t index) {
903- if (index >= attributes.size ())
904- return nullptr ;
905- return attributes[index].entry ;
906- }
907- Type getTypeOrSentinel (size_t index) {
908- if (index >= types.size ())
909- return nullptr ;
910- return types[index].entry ;
868+ Attribute resolveAttribute (size_t index) {
869+ return resolveEntry (attributes, index, " Attribute" );
911870 }
871+ Type resolveType (size_t index) { return resolveEntry (types, index, " Type" ); }
912872
913873 // / Parse a reference to an attribute or type using the given reader.
914874 LogicalResult parseAttribute (EncodingReader &reader, Attribute &result) {
@@ -949,33 +909,23 @@ class AttrTypeReader {
949909 llvm::getTypeName<T>(), " , but got: " , baseResult);
950910 }
951911
952- // / Add an index to the deferred worklist for re-parsing.
953- void addDeferredParsing (uint64_t index) { deferredWorklist.push_back (index); }
954-
955912private:
956913 // / Resolve the given entry at `index`.
957914 template <typename T>
958- T resolveEntry (SmallVectorImpl<Entry<T>> &entries, uint64_t index,
959- StringRef entryType, uint64_t depth = 0 );
915+ T resolveEntry (SmallVectorImpl<Entry<T>> &entries, size_t index,
916+ StringRef entryType);
960917
961- // / Read the entry at the given index, returning failure if the entry is not
962- // / yet resolved .
918+ // / Parse an entry using the given reader that was encoded using the textual
919+ // / assembly format .
963920 template <typename T>
964- LogicalResult readEntry (SmallVectorImpl<Entry<T>> &entries, uint64_t index ,
965- T &result, StringRef entryType, uint64_t depth );
921+ LogicalResult parseAsmEntry (T &result, EncodingReader &reader ,
922+ StringRef entryType );
966923
967924 // / Parse an entry using the given reader that was encoded using a custom
968925 // / bytecode format.
969926 template <typename T>
970927 LogicalResult parseCustomEntry (Entry<T> &entry, EncodingReader &reader,
971- StringRef entryType, uint64_t index,
972- uint64_t depth);
973-
974- // / Parse an entry using the given reader that was encoded using the textual
975- // / assembly format.
976- template <typename T>
977- LogicalResult parseAsmEntry (T &result, EncodingReader &reader,
978- StringRef entryType);
928+ StringRef entryType);
979929
980930 // / The string section reader used to resolve string references when parsing
981931 // / custom encoded attribute/type entries.
@@ -1001,10 +951,6 @@ class AttrTypeReader {
1001951
1002952 // / Reference to the parser configuration.
1003953 const ParserConfig &parserConfig;
1004-
1005- // / Worklist for deferred attribute/type parsing. This is used to handle
1006- // / deeply nested structures like CallSiteLoc iteratively.
1007- std::vector<uint64_t > deferredWorklist;
1008954};
1009955
1010956class DialectReader : public DialectBytecodeReader {
@@ -1013,11 +959,10 @@ class DialectReader : public DialectBytecodeReader {
1013959 const StringSectionReader &stringReader,
1014960 const ResourceSectionReader &resourceReader,
1015961 const llvm::StringMap<BytecodeDialect *> &dialectsMap,
1016- EncodingReader &reader, uint64_t &bytecodeVersion,
1017- uint64_t depth = 0 )
962+ EncodingReader &reader, uint64_t &bytecodeVersion)
1018963 : attrTypeReader(attrTypeReader), stringReader(stringReader),
1019964 resourceReader (resourceReader), dialectsMap(dialectsMap),
1020- reader(reader), bytecodeVersion(bytecodeVersion), depth(depth) {}
965+ reader(reader), bytecodeVersion(bytecodeVersion) {}
1021966
1022967 InFlightDiagnostic emitError (const Twine &msg) const override {
1023968 return reader.emitError (msg);
@@ -1053,40 +998,14 @@ class DialectReader : public DialectBytecodeReader {
1053998 // IR
1054999 // ===--------------------------------------------------------------------===//
10551000
1056- // / The maximum depth to eagerly parse nested attributes/types before
1057- // / deferring.
1058- static constexpr uint64_t maxAttrTypeDepth = 5 ;
1059-
10601001 LogicalResult readAttribute (Attribute &result) override {
1061- uint64_t index;
1062- if (failed (reader.parseVarInt (index)))
1063- return failure ();
1064- if (depth > maxAttrTypeDepth) {
1065- if (Attribute attr = attrTypeReader.getAttributeOrSentinel (index)) {
1066- result = attr;
1067- return success ();
1068- }
1069- attrTypeReader.addDeferredParsing (index);
1070- return failure ();
1071- }
1072- return attrTypeReader.readAttribute (index, result, depth + 1 );
1002+ return attrTypeReader.parseAttribute (reader, result);
10731003 }
10741004 LogicalResult readOptionalAttribute (Attribute &result) override {
10751005 return attrTypeReader.parseOptionalAttribute (reader, result);
10761006 }
10771007 LogicalResult readType (Type &result) override {
1078- uint64_t index;
1079- if (failed (reader.parseVarInt (index)))
1080- return failure ();
1081- if (depth > maxAttrTypeDepth) {
1082- if (Type type = attrTypeReader.getTypeOrSentinel (index)) {
1083- result = type;
1084- return success ();
1085- }
1086- attrTypeReader.addDeferredParsing (index);
1087- return failure ();
1088- }
1089- return attrTypeReader.readType (index, result, depth + 1 );
1008+ return attrTypeReader.parseType (reader, result);
10901009 }
10911010
10921011 FailureOr<AsmDialectResourceHandle> readResourceHandle () override {
@@ -1176,7 +1095,6 @@ class DialectReader : public DialectBytecodeReader {
11761095 const llvm::StringMap<BytecodeDialect *> &dialectsMap;
11771096 EncodingReader &reader;
11781097 uint64_t &bytecodeVersion;
1179- uint64_t depth;
11801098};
11811099
11821100// / Wraps the properties section and handles reading properties out of it.
@@ -1320,112 +1238,69 @@ LogicalResult AttrTypeReader::initialize(
13201238}
13211239
13221240template <typename T>
1323- T AttrTypeReader::resolveEntry (SmallVectorImpl<Entry<T>> &entries,
1324- uint64_t index, StringRef entryType,
1325- uint64_t depth) {
1241+ T AttrTypeReader::resolveEntry (SmallVectorImpl<Entry<T>> &entries, size_t index,
1242+ StringRef entryType) {
13261243 if (index >= entries.size ()) {
13271244 emitError (fileLoc) << " invalid " << entryType << " index: " << index;
13281245 return {};
13291246 }
13301247
1331- // Fast path: Try direct parsing without worklist overhead. This handles the
1332- // common case where there are no deferred dependencies.
1333- assert (deferredWorklist.empty ());
1334- T result;
1335- if (succeeded (readEntry (entries, index, result, entryType, depth))) {
1336- assert (deferredWorklist.empty ());
1337- return result;
1338- }
1339- if (deferredWorklist.empty ()) {
1340- // Failed with no deferred entries is error.
1341- return T ();
1342- }
1343-
1344- // Slow path: Use worklist to handle deferred dependencies. Use a deque to
1345- // iteratively resolve entries with dependencies.
1346- // - Pop from front to process
1347- // - Push new dependencies to front (depth-first)
1348- // - Move failed entries to back (retry after dependencies)
1349- std::deque<size_t > worklist;
1350- llvm::DenseSet<size_t > inWorklist;
1351-
1352- // Add the original index and any dependencies from the fast path attempt.
1353- worklist.push_back (index);
1354- inWorklist.insert (index);
1355- for (uint64_t idx : llvm::reverse (deferredWorklist)) {
1356- if (inWorklist.insert (idx).second )
1357- worklist.push_front (idx);
1358- }
1359-
1360- while (!worklist.empty ()) {
1361- size_t currentIndex = worklist.front ();
1362- worklist.pop_front ();
1363-
1364- // Clear the deferred worklist before parsing to capture any new entries.
1365- deferredWorklist.clear ();
1248+ // If the entry has already been resolved, there is nothing left to do.
1249+ Entry<T> &entry = entries[index];
1250+ if (entry.entry )
1251+ return entry.entry ;
13661252
1367- T result;
1368- if (succeeded (readEntry (entries, currentIndex, result, entryType, depth))) {
1369- inWorklist.erase (currentIndex);
1370- continue ;
1371- }
1253+ // Parse the entry.
1254+ EncodingReader reader (entry.data , fileLoc);
13721255
1373- if (deferredWorklist.empty ()) {
1374- // Parsing failed with no deferred entries which implies an error.
1256+ // Parse based on how the entry was encoded.
1257+ if (entry.hasCustomEncoding ) {
1258+ if (failed (parseCustomEntry (entry, reader, entryType)))
13751259 return T ();
1376- }
1377-
1378- // Move this entry to the back to retry after dependencies.
1379- worklist.push_back (currentIndex);
1260+ } else if (failed (parseAsmEntry (entry.entry , reader, entryType))) {
1261+ return T ();
1262+ }
13801263
1381- // Add dependencies to the front (in reverse so they maintain order).
1382- for (uint64_t idx : llvm::reverse (deferredWorklist)) {
1383- if (inWorklist.insert (idx).second )
1384- worklist.push_front (idx);
1385- }
1386- deferredWorklist.clear ();
1264+ if (!reader.empty ()) {
1265+ reader.emitError (" unexpected trailing bytes after " + entryType + " entry" );
1266+ return T ();
13871267 }
1388- return entries[index] .entry ;
1268+ return entry .entry ;
13891269}
13901270
13911271template <typename T>
1392- LogicalResult AttrTypeReader::readEntry (SmallVectorImpl<Entry<T>> &entries,
1393- uint64_t index, T &result,
1394- StringRef entryType, uint64_t depth) {
1395- if (index >= entries.size ())
1396- return emitError (fileLoc) << " invalid " << entryType << " index: " << index;
1397-
1398- // If the entry has already been resolved, return it.
1399- Entry<T> &entry = entries[index];
1400- if (entry.entry ) {
1401- result = entry.entry ;
1402- return success ();
1403- }
1404-
1405- // If the entry hasn't been resolved, try to parse it.
1406- EncodingReader reader (entry.data , fileLoc);
1407- LogicalResult parseResult =
1408- entry.hasCustomEncoding
1409- ? parseCustomEntry (entry, reader, entryType, index, depth)
1410- : parseAsmEntry (entry.entry , reader, entryType);
1411- if (failed (parseResult))
1272+ LogicalResult AttrTypeReader::parseAsmEntry (T &result, EncodingReader &reader,
1273+ StringRef entryType) {
1274+ StringRef asmStr;
1275+ if (failed (reader.parseNullTerminatedString (asmStr)))
14121276 return failure ();
14131277
1414- if (!reader.empty ())
1415- return reader.emitError (" unexpected trailing bytes after " + entryType +
1416- " entry" );
1278+ // Invoke the MLIR assembly parser to parse the entry text.
1279+ size_t numRead = 0 ;
1280+ MLIRContext *context = fileLoc->getContext ();
1281+ if constexpr (std::is_same_v<T, Type>)
1282+ result =
1283+ ::parseType (asmStr, context, &numRead, /* isKnownNullTerminated=*/ true );
1284+ else
1285+ result = ::parseAttribute (asmStr, context, Type (), &numRead,
1286+ /* isKnownNullTerminated=*/ true );
1287+ if (!result)
1288+ return failure ();
14171289
1418- result = entry.entry ;
1290+ // Ensure there weren't dangling characters after the entry.
1291+ if (numRead != asmStr.size ()) {
1292+ return reader.emitError (" trailing characters found after " , entryType,
1293+ " assembly format: " , asmStr.drop_front (numRead));
1294+ }
14191295 return success ();
14201296}
14211297
14221298template <typename T>
14231299LogicalResult AttrTypeReader::parseCustomEntry (Entry<T> &entry,
14241300 EncodingReader &reader,
1425- StringRef entryType,
1426- uint64_t index, uint64_t depth) {
1301+ StringRef entryType) {
14271302 DialectReader dialectReader (*this , stringReader, resourceReader, dialectsMap,
1428- reader, bytecodeVersion, depth );
1303+ reader, bytecodeVersion);
14291304 if (failed (entry.dialect ->load (dialectReader, fileLoc.getContext ())))
14301305 return failure ();
14311306
@@ -1475,33 +1350,6 @@ LogicalResult AttrTypeReader::parseCustomEntry(Entry<T> &entry,
14751350 return success (!!entry.entry );
14761351}
14771352
1478- template <typename T>
1479- LogicalResult AttrTypeReader::parseAsmEntry (T &result, EncodingReader &reader,
1480- StringRef entryType) {
1481- StringRef asmStr;
1482- if (failed (reader.parseNullTerminatedString (asmStr)))
1483- return failure ();
1484-
1485- // Invoke the MLIR assembly parser to parse the entry text.
1486- size_t numRead = 0 ;
1487- MLIRContext *context = fileLoc->getContext ();
1488- if constexpr (std::is_same_v<T, Type>)
1489- result =
1490- ::parseType (asmStr, context, &numRead, /* isKnownNullTerminated=*/ true );
1491- else
1492- result = ::parseAttribute (asmStr, context, Type (), &numRead,
1493- /* isKnownNullTerminated=*/ true );
1494- if (!result)
1495- return failure ();
1496-
1497- // Ensure there weren't dangling characters after the entry.
1498- if (numRead != asmStr.size ()) {
1499- return reader.emitError (" trailing characters found after " , entryType,
1500- " assembly format: " , asmStr.drop_front (numRead));
1501- }
1502- return success ();
1503- }
1504-
15051353// ===----------------------------------------------------------------------===//
15061354// Bytecode Reader
15071355// ===----------------------------------------------------------------------===//
0 commit comments