Skip to content

Commit a4baf2c

Browse files
committed
Revert "[mlir][bytecode] Add support for deferred attribute/type parsing. (#170993)"
This reverts commit 93d2ef1. A regression was found. See: #170993 (comment)
1 parent 399b330 commit a4baf2c

File tree

2 files changed

+54
-243
lines changed

2 files changed

+54
-243
lines changed

mlir/lib/Bytecode/Reader/BytecodeReader.cpp

Lines changed: 54 additions & 206 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727

2828
#include <cstddef>
2929
#include <cstdint>
30-
#include <deque>
3130
#include <list>
3231
#include <memory>
3332
#include <numeric>
@@ -831,23 +830,6 @@ namespace {
831830
/// This class provides support for reading attribute and type entries from the
832831
/// bytecode. Attribute and Type entries are read lazily on demand, so we use
833832
/// this reader to manage when to actually parse them from the bytecode.
834-
///
835-
/// The parsing of attributes & types are generally recursive, this can lead to
836-
/// stack overflows for deeply nested structures, so we track a few extra pieces
837-
/// of information to avoid this:
838-
///
839-
/// - `depth`: The current depth while parsing nested attributes. We defer on
840-
/// parsing deeply nested attributes to avoid potential stack overflows. The
841-
/// deferred parsing is achieved by reporting a failure when parsing a nested
842-
/// attribute/type and registering the index of the encountered attribute/type
843-
/// in the deferred parsing worklist. Hence, a failure with deffered entry
844-
/// does not constitute a failure, it also requires that folks return on
845-
/// first failure rather than attempting additional parses.
846-
/// - `deferredWorklist`: A list of attribute/type indices that we could not
847-
/// parse due to hitting the depth limit. The worklist is used to capture the
848-
/// indices of attributes/types that need to be parsed/reparsed when we hit
849-
/// the depth limit. This enables moving the tracking of what needs to be
850-
/// parsed to the heap.
851833
class AttrTypeReader {
852834
/// This class represents a single attribute or type entry.
853835
template <typename T>
@@ -881,34 +863,12 @@ class AttrTypeReader {
881863
ArrayRef<uint8_t> sectionData,
882864
ArrayRef<uint8_t> offsetSectionData);
883865

884-
LogicalResult readAttribute(uint64_t index, Attribute &result,
885-
uint64_t depth = 0) {
886-
return readEntry(attributes, index, result, "attribute", depth);
887-
}
888-
889-
LogicalResult readType(uint64_t index, Type &result, uint64_t depth = 0) {
890-
return readEntry(types, index, result, "type", depth);
891-
}
892-
893866
/// Resolve the attribute or type at the given index. Returns nullptr on
894867
/// failure.
895-
Attribute resolveAttribute(size_t index, uint64_t depth = 0) {
896-
return resolveEntry(attributes, index, "Attribute", depth);
897-
}
898-
Type resolveType(size_t index, uint64_t depth = 0) {
899-
return resolveEntry(types, index, "Type", depth);
900-
}
901-
902-
Attribute getAttributeOrSentinel(size_t index) {
903-
if (index >= attributes.size())
904-
return nullptr;
905-
return attributes[index].entry;
906-
}
907-
Type getTypeOrSentinel(size_t index) {
908-
if (index >= types.size())
909-
return nullptr;
910-
return types[index].entry;
868+
Attribute resolveAttribute(size_t index) {
869+
return resolveEntry(attributes, index, "Attribute");
911870
}
871+
Type resolveType(size_t index) { return resolveEntry(types, index, "Type"); }
912872

913873
/// Parse a reference to an attribute or type using the given reader.
914874
LogicalResult parseAttribute(EncodingReader &reader, Attribute &result) {
@@ -949,33 +909,23 @@ class AttrTypeReader {
949909
llvm::getTypeName<T>(), ", but got: ", baseResult);
950910
}
951911

952-
/// Add an index to the deferred worklist for re-parsing.
953-
void addDeferredParsing(uint64_t index) { deferredWorklist.push_back(index); }
954-
955912
private:
956913
/// Resolve the given entry at `index`.
957914
template <typename T>
958-
T resolveEntry(SmallVectorImpl<Entry<T>> &entries, uint64_t index,
959-
StringRef entryType, uint64_t depth = 0);
915+
T resolveEntry(SmallVectorImpl<Entry<T>> &entries, size_t index,
916+
StringRef entryType);
960917

961-
/// Read the entry at the given index, returning failure if the entry is not
962-
/// yet resolved.
918+
/// Parse an entry using the given reader that was encoded using the textual
919+
/// assembly format.
963920
template <typename T>
964-
LogicalResult readEntry(SmallVectorImpl<Entry<T>> &entries, uint64_t index,
965-
T &result, StringRef entryType, uint64_t depth);
921+
LogicalResult parseAsmEntry(T &result, EncodingReader &reader,
922+
StringRef entryType);
966923

967924
/// Parse an entry using the given reader that was encoded using a custom
968925
/// bytecode format.
969926
template <typename T>
970927
LogicalResult parseCustomEntry(Entry<T> &entry, EncodingReader &reader,
971-
StringRef entryType, uint64_t index,
972-
uint64_t depth);
973-
974-
/// Parse an entry using the given reader that was encoded using the textual
975-
/// assembly format.
976-
template <typename T>
977-
LogicalResult parseAsmEntry(T &result, EncodingReader &reader,
978-
StringRef entryType);
928+
StringRef entryType);
979929

980930
/// The string section reader used to resolve string references when parsing
981931
/// custom encoded attribute/type entries.
@@ -1001,10 +951,6 @@ class AttrTypeReader {
1001951

1002952
/// Reference to the parser configuration.
1003953
const ParserConfig &parserConfig;
1004-
1005-
/// Worklist for deferred attribute/type parsing. This is used to handle
1006-
/// deeply nested structures like CallSiteLoc iteratively.
1007-
std::vector<uint64_t> deferredWorklist;
1008954
};
1009955

1010956
class DialectReader : public DialectBytecodeReader {
@@ -1013,11 +959,10 @@ class DialectReader : public DialectBytecodeReader {
1013959
const StringSectionReader &stringReader,
1014960
const ResourceSectionReader &resourceReader,
1015961
const llvm::StringMap<BytecodeDialect *> &dialectsMap,
1016-
EncodingReader &reader, uint64_t &bytecodeVersion,
1017-
uint64_t depth = 0)
962+
EncodingReader &reader, uint64_t &bytecodeVersion)
1018963
: attrTypeReader(attrTypeReader), stringReader(stringReader),
1019964
resourceReader(resourceReader), dialectsMap(dialectsMap),
1020-
reader(reader), bytecodeVersion(bytecodeVersion), depth(depth) {}
965+
reader(reader), bytecodeVersion(bytecodeVersion) {}
1021966

1022967
InFlightDiagnostic emitError(const Twine &msg) const override {
1023968
return reader.emitError(msg);
@@ -1053,40 +998,14 @@ class DialectReader : public DialectBytecodeReader {
1053998
// IR
1054999
//===--------------------------------------------------------------------===//
10551000

1056-
/// The maximum depth to eagerly parse nested attributes/types before
1057-
/// deferring.
1058-
static constexpr uint64_t maxAttrTypeDepth = 5;
1059-
10601001
LogicalResult readAttribute(Attribute &result) override {
1061-
uint64_t index;
1062-
if (failed(reader.parseVarInt(index)))
1063-
return failure();
1064-
if (depth > maxAttrTypeDepth) {
1065-
if (Attribute attr = attrTypeReader.getAttributeOrSentinel(index)) {
1066-
result = attr;
1067-
return success();
1068-
}
1069-
attrTypeReader.addDeferredParsing(index);
1070-
return failure();
1071-
}
1072-
return attrTypeReader.readAttribute(index, result, depth + 1);
1002+
return attrTypeReader.parseAttribute(reader, result);
10731003
}
10741004
LogicalResult readOptionalAttribute(Attribute &result) override {
10751005
return attrTypeReader.parseOptionalAttribute(reader, result);
10761006
}
10771007
LogicalResult readType(Type &result) override {
1078-
uint64_t index;
1079-
if (failed(reader.parseVarInt(index)))
1080-
return failure();
1081-
if (depth > maxAttrTypeDepth) {
1082-
if (Type type = attrTypeReader.getTypeOrSentinel(index)) {
1083-
result = type;
1084-
return success();
1085-
}
1086-
attrTypeReader.addDeferredParsing(index);
1087-
return failure();
1088-
}
1089-
return attrTypeReader.readType(index, result, depth + 1);
1008+
return attrTypeReader.parseType(reader, result);
10901009
}
10911010

10921011
FailureOr<AsmDialectResourceHandle> readResourceHandle() override {
@@ -1176,7 +1095,6 @@ class DialectReader : public DialectBytecodeReader {
11761095
const llvm::StringMap<BytecodeDialect *> &dialectsMap;
11771096
EncodingReader &reader;
11781097
uint64_t &bytecodeVersion;
1179-
uint64_t depth;
11801098
};
11811099

11821100
/// Wraps the properties section and handles reading properties out of it.
@@ -1320,112 +1238,69 @@ LogicalResult AttrTypeReader::initialize(
13201238
}
13211239

13221240
template <typename T>
1323-
T AttrTypeReader::resolveEntry(SmallVectorImpl<Entry<T>> &entries,
1324-
uint64_t index, StringRef entryType,
1325-
uint64_t depth) {
1241+
T AttrTypeReader::resolveEntry(SmallVectorImpl<Entry<T>> &entries, size_t index,
1242+
StringRef entryType) {
13261243
if (index >= entries.size()) {
13271244
emitError(fileLoc) << "invalid " << entryType << " index: " << index;
13281245
return {};
13291246
}
13301247

1331-
// Fast path: Try direct parsing without worklist overhead. This handles the
1332-
// common case where there are no deferred dependencies.
1333-
assert(deferredWorklist.empty());
1334-
T result;
1335-
if (succeeded(readEntry(entries, index, result, entryType, depth))) {
1336-
assert(deferredWorklist.empty());
1337-
return result;
1338-
}
1339-
if (deferredWorklist.empty()) {
1340-
// Failed with no deferred entries is error.
1341-
return T();
1342-
}
1343-
1344-
// Slow path: Use worklist to handle deferred dependencies. Use a deque to
1345-
// iteratively resolve entries with dependencies.
1346-
// - Pop from front to process
1347-
// - Push new dependencies to front (depth-first)
1348-
// - Move failed entries to back (retry after dependencies)
1349-
std::deque<size_t> worklist;
1350-
llvm::DenseSet<size_t> inWorklist;
1351-
1352-
// Add the original index and any dependencies from the fast path attempt.
1353-
worklist.push_back(index);
1354-
inWorklist.insert(index);
1355-
for (uint64_t idx : llvm::reverse(deferredWorklist)) {
1356-
if (inWorklist.insert(idx).second)
1357-
worklist.push_front(idx);
1358-
}
1359-
1360-
while (!worklist.empty()) {
1361-
size_t currentIndex = worklist.front();
1362-
worklist.pop_front();
1363-
1364-
// Clear the deferred worklist before parsing to capture any new entries.
1365-
deferredWorklist.clear();
1248+
// If the entry has already been resolved, there is nothing left to do.
1249+
Entry<T> &entry = entries[index];
1250+
if (entry.entry)
1251+
return entry.entry;
13661252

1367-
T result;
1368-
if (succeeded(readEntry(entries, currentIndex, result, entryType, depth))) {
1369-
inWorklist.erase(currentIndex);
1370-
continue;
1371-
}
1253+
// Parse the entry.
1254+
EncodingReader reader(entry.data, fileLoc);
13721255

1373-
if (deferredWorklist.empty()) {
1374-
// Parsing failed with no deferred entries which implies an error.
1256+
// Parse based on how the entry was encoded.
1257+
if (entry.hasCustomEncoding) {
1258+
if (failed(parseCustomEntry(entry, reader, entryType)))
13751259
return T();
1376-
}
1377-
1378-
// Move this entry to the back to retry after dependencies.
1379-
worklist.push_back(currentIndex);
1260+
} else if (failed(parseAsmEntry(entry.entry, reader, entryType))) {
1261+
return T();
1262+
}
13801263

1381-
// Add dependencies to the front (in reverse so they maintain order).
1382-
for (uint64_t idx : llvm::reverse(deferredWorklist)) {
1383-
if (inWorklist.insert(idx).second)
1384-
worklist.push_front(idx);
1385-
}
1386-
deferredWorklist.clear();
1264+
if (!reader.empty()) {
1265+
reader.emitError("unexpected trailing bytes after " + entryType + " entry");
1266+
return T();
13871267
}
1388-
return entries[index].entry;
1268+
return entry.entry;
13891269
}
13901270

13911271
template <typename T>
1392-
LogicalResult AttrTypeReader::readEntry(SmallVectorImpl<Entry<T>> &entries,
1393-
uint64_t index, T &result,
1394-
StringRef entryType, uint64_t depth) {
1395-
if (index >= entries.size())
1396-
return emitError(fileLoc) << "invalid " << entryType << " index: " << index;
1397-
1398-
// If the entry has already been resolved, return it.
1399-
Entry<T> &entry = entries[index];
1400-
if (entry.entry) {
1401-
result = entry.entry;
1402-
return success();
1403-
}
1404-
1405-
// If the entry hasn't been resolved, try to parse it.
1406-
EncodingReader reader(entry.data, fileLoc);
1407-
LogicalResult parseResult =
1408-
entry.hasCustomEncoding
1409-
? parseCustomEntry(entry, reader, entryType, index, depth)
1410-
: parseAsmEntry(entry.entry, reader, entryType);
1411-
if (failed(parseResult))
1272+
LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
1273+
StringRef entryType) {
1274+
StringRef asmStr;
1275+
if (failed(reader.parseNullTerminatedString(asmStr)))
14121276
return failure();
14131277

1414-
if (!reader.empty())
1415-
return reader.emitError("unexpected trailing bytes after " + entryType +
1416-
" entry");
1278+
// Invoke the MLIR assembly parser to parse the entry text.
1279+
size_t numRead = 0;
1280+
MLIRContext *context = fileLoc->getContext();
1281+
if constexpr (std::is_same_v<T, Type>)
1282+
result =
1283+
::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
1284+
else
1285+
result = ::parseAttribute(asmStr, context, Type(), &numRead,
1286+
/*isKnownNullTerminated=*/true);
1287+
if (!result)
1288+
return failure();
14171289

1418-
result = entry.entry;
1290+
// Ensure there weren't dangling characters after the entry.
1291+
if (numRead != asmStr.size()) {
1292+
return reader.emitError("trailing characters found after ", entryType,
1293+
" assembly format: ", asmStr.drop_front(numRead));
1294+
}
14191295
return success();
14201296
}
14211297

14221298
template <typename T>
14231299
LogicalResult AttrTypeReader::parseCustomEntry(Entry<T> &entry,
14241300
EncodingReader &reader,
1425-
StringRef entryType,
1426-
uint64_t index, uint64_t depth) {
1301+
StringRef entryType) {
14271302
DialectReader dialectReader(*this, stringReader, resourceReader, dialectsMap,
1428-
reader, bytecodeVersion, depth);
1303+
reader, bytecodeVersion);
14291304
if (failed(entry.dialect->load(dialectReader, fileLoc.getContext())))
14301305
return failure();
14311306

@@ -1475,33 +1350,6 @@ LogicalResult AttrTypeReader::parseCustomEntry(Entry<T> &entry,
14751350
return success(!!entry.entry);
14761351
}
14771352

1478-
template <typename T>
1479-
LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
1480-
StringRef entryType) {
1481-
StringRef asmStr;
1482-
if (failed(reader.parseNullTerminatedString(asmStr)))
1483-
return failure();
1484-
1485-
// Invoke the MLIR assembly parser to parse the entry text.
1486-
size_t numRead = 0;
1487-
MLIRContext *context = fileLoc->getContext();
1488-
if constexpr (std::is_same_v<T, Type>)
1489-
result =
1490-
::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
1491-
else
1492-
result = ::parseAttribute(asmStr, context, Type(), &numRead,
1493-
/*isKnownNullTerminated=*/true);
1494-
if (!result)
1495-
return failure();
1496-
1497-
// Ensure there weren't dangling characters after the entry.
1498-
if (numRead != asmStr.size()) {
1499-
return reader.emitError("trailing characters found after ", entryType,
1500-
" assembly format: ", asmStr.drop_front(numRead));
1501-
}
1502-
return success();
1503-
}
1504-
15051353
//===----------------------------------------------------------------------===//
15061354
// Bytecode Reader
15071355
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)