diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/MarkRAStates.h index 675ab9727142b..202f1dda2aad8 100644 --- a/bolt/include/bolt/Passes/MarkRAStates.h +++ b/bolt/include/bolt/Passes/MarkRAStates.h @@ -13,11 +13,16 @@ #define BOLT_PASSES_MARK_RA_STATES #include "bolt/Passes/BinaryPasses.h" +#include namespace llvm { namespace bolt { class MarkRAStates : public BinaryFunctionPass { + // setIgnored() is not thread-safe, but the pass is running on functions in + // parallel. + std::mutex IgnoreMutex; + public: explicit MarkRAStates() : BinaryFunctionPass(false) {} diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index af6a5ca7e31e5..b262d66732b7d 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -43,10 +43,11 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { // Not all functions have .cfi_negate_ra_state in them. But if one does, // we expect psign/pauth instructions to have the hasNegateRAState // annotation. - BF.setIgnored(); BC.outs() << "BOLT-INFO: inconsistent RAStates in function " << BF.getPrintName() << ": ptr sign/auth inst without .cfi_negate_ra_state\n"; + std::lock_guard Lock(IgnoreMutex); + BF.setIgnored(); return false; } } @@ -67,6 +68,7 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { BC.outs() << "BOLT-INFO: inconsistent RAStates in function " << BF.getPrintName() << ": ptr signing inst encountered in Signed RA state\n"; + std::lock_guard Lock(IgnoreMutex); BF.setIgnored(); return false; } @@ -80,6 +82,7 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { << BF.getPrintName() << ": ptr authenticating inst encountered in Unsigned RA " "state\n"; + std::lock_guard Lock(IgnoreMutex); BF.setIgnored(); return false; } diff --git a/clang-tools-extra/clangd/support/Markup.cpp b/clang-tools-extra/clangd/support/Markup.cpp index 304917de252bf..9ba993a04709c 100644 --- a/clang-tools-extra/clangd/support/Markup.cpp +++ b/clang-tools-extra/clangd/support/Markup.cpp @@ -475,31 +475,61 @@ std::string Block::asPlainText() const { return llvm::StringRef(OS.str()).trim().str(); } +void Paragraph::renderNewlinesMarkdown(llvm::raw_ostream &OS, + llvm::StringRef ParagraphText) const { + llvm::StringRef Line, Rest; + + for (std::tie(Line, Rest) = ParagraphText.ltrim("\n").rtrim().split('\n'); + !(Line.empty() && Rest.empty()); + std::tie(Line, Rest) = Rest.split('\n')) { + + if (Line.empty()) { + // Blank lines are preserved in markdown. + OS << '\n'; + continue; + } + + OS << Line; + + if (!Rest.empty() && isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/true)) + // In markdown, 2 spaces before a line break forces a line break. + OS << " "; + OS << '\n'; + } +} + void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const { bool NeedsSpace = false; bool HasChunks = false; + std::string ParagraphText; + ParagraphText.reserve(EstimatedStringSize); + llvm::raw_string_ostream ParagraphTextOS(ParagraphText); for (auto &C : Chunks) { if (C.SpaceBefore || NeedsSpace) - OS << " "; + ParagraphTextOS << " "; switch (C.Kind) { case ChunkKind::PlainText: - OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/true); + ParagraphTextOS << renderText(C.Contents, !HasChunks, + /*EscapeMarkdown=*/true); break; case ChunkKind::InlineCode: - OS << renderInlineBlock(C.Contents); + ParagraphTextOS << renderInlineBlock(C.Contents); break; case ChunkKind::Bold: - OS << renderText("**" + C.Contents + "**", !HasChunks, - /*EscapeMarkdown=*/true); + ParagraphTextOS << renderText("**" + C.Contents + "**", !HasChunks, + /*EscapeMarkdown=*/true); break; case ChunkKind::Emphasized: - OS << renderText("*" + C.Contents + "*", !HasChunks, - /*EscapeMarkdown=*/true); + ParagraphTextOS << renderText("*" + C.Contents + "*", !HasChunks, + /*EscapeMarkdown=*/true); break; } HasChunks = true; NeedsSpace = C.SpaceAfter; } + + renderNewlinesMarkdown(OS, ParagraphText); + // A paragraph in markdown is separated by a blank line. OS << "\n\n"; } @@ -507,28 +537,39 @@ void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const { void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const { bool NeedsSpace = false; bool HasChunks = false; + std::string ParagraphText; + ParagraphText.reserve(EstimatedStringSize); + llvm::raw_string_ostream ParagraphTextOS(ParagraphText); for (auto &C : Chunks) { if (C.SpaceBefore || NeedsSpace) - OS << " "; + ParagraphTextOS << " "; switch (C.Kind) { case ChunkKind::PlainText: - OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false); + ParagraphTextOS << renderText(C.Contents, !HasChunks, + /*EscapeMarkdown=*/false); break; case ChunkKind::InlineCode: - OS << renderInlineBlock(C.Contents); + ParagraphTextOS << renderInlineBlock(C.Contents); break; case ChunkKind::Bold: - OS << "**" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false) - << "**"; + ParagraphTextOS << "**" + << renderText(C.Contents, !HasChunks, + /*EscapeMarkdown=*/false) + << "**"; break; case ChunkKind::Emphasized: - OS << "*" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false) - << "*"; + ParagraphTextOS << "*" + << renderText(C.Contents, !HasChunks, + /*EscapeMarkdown=*/false) + << "*"; break; } HasChunks = true; NeedsSpace = C.SpaceAfter; } + + renderNewlinesMarkdown(OS, ParagraphText); + // A paragraph in markdown is separated by a blank line. OS << "\n\n"; } @@ -537,8 +578,6 @@ std::unique_ptr Paragraph::clone() const { return std::make_unique(*this); } -/// Choose a marker to delimit `Text` from a prioritized list of options. -/// This is more readable than escaping for plain-text. llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef Options, llvm::StringRef Text) const { // Prefer a delimiter whose characters don't appear in the text. @@ -548,23 +587,36 @@ llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef Options, return Options.front(); } -bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line) const { +bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line, + bool IsMarkdown) const { constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt"; + if (!IsMarkdown && Line.ends_with(" ")) + return true; + Line = Line.rtrim(); return !Line.empty() && Punctuation.contains(Line.back()); } -bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const { +bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest, + bool IsMarkdown) const { + // Plaintext indicators: // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote, - // '#' headings, '`' code blocks, two spaces (markdown force newline) - constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt"; + // '#' headings, '`' code blocks + constexpr llvm::StringLiteral LinebreakIndicatorsPlainText = + R"txt(-*@\>#`)txt"; + // Markdown indicators: + // Only '@' and '\' documentation commands/escaped markdown syntax. + constexpr llvm::StringLiteral LinebreakIndicatorsMarkdown = R"txt(@\)txt"; Rest = Rest.ltrim(" \t"); if (Rest.empty()) return false; - if (LinebreakIndicators.contains(Rest.front())) + if (IsMarkdown) + return LinebreakIndicatorsMarkdown.contains(Rest.front()); + + if (LinebreakIndicatorsPlainText.contains(Rest.front())) return true; if (llvm::isDigit(Rest.front())) { @@ -575,64 +627,18 @@ bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const { return false; } -bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line, - llvm::StringRef Rest) const { - // In Markdown, 2 spaces before a line break forces a line break. - // Add a line break for plaintext in this case too. +bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest, + bool IsMarkdown) const { // Should we also consider whether Line is short? - return Line.ends_with(" ") || punctuationIndicatesLineBreak(Line) || - isHardLineBreakIndicator(Rest); + return punctuationIndicatesLineBreak(Line, IsMarkdown) || + isHardLineBreakIndicator(Rest, IsMarkdown); } -void Paragraph::renderPlainText(llvm::raw_ostream &OS) const { - bool NeedsSpace = false; - std::string ConcatenatedText; - ConcatenatedText.reserve(EstimatedStringSize); - - llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText); - - for (auto &C : Chunks) { - - if (C.Kind == ChunkKind::PlainText) { - if (C.SpaceBefore || NeedsSpace) - ConcatenatedOS << ' '; - - ConcatenatedOS << C.Contents; - NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter; - continue; - } - - if (C.SpaceBefore || NeedsSpace) - ConcatenatedOS << ' '; - llvm::StringRef Marker = ""; - if (C.Preserve && C.Kind == ChunkKind::InlineCode) - Marker = chooseMarker({"`", "'", "\""}, C.Contents); - else if (C.Kind == ChunkKind::Bold) - Marker = "**"; - else if (C.Kind == ChunkKind::Emphasized) - Marker = "*"; - ConcatenatedOS << Marker << C.Contents << Marker; - NeedsSpace = C.SpaceAfter; - } - - // We go through the contents line by line to handle the newlines - // and required spacing correctly. - // - // Newlines are added if: - // - the line ends with 2 spaces and a newline follows - // - the line ends with punctuation that indicates a line break (.:,;!?) - // - the next line starts with a hard line break indicator (-@>#`, or a digit - // followed by '.' or ')'), ignoring leading whitespace. - // - // Otherwise, newlines in the input are replaced with a single space. - // - // Multiple spaces are collapsed into a single space. - // - // Lines containing only whitespace are ignored. +void Paragraph::renderNewlinesPlaintext(llvm::raw_ostream &OS, + llvm::StringRef ParagraphText) const { llvm::StringRef Line, Rest; - for (std::tie(Line, Rest) = - llvm::StringRef(ConcatenatedText).trim().split('\n'); + for (std::tie(Line, Rest) = ParagraphText.trim().split('\n'); !(Line.empty() && Rest.empty()); std::tie(Line, Rest) = Rest.split('\n')) { @@ -653,7 +659,7 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const { OS << canonicalizeSpaces(Line); - if (isHardLineBreakAfter(Line, Rest)) + if (isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/false)) OS << '\n'; else if (!Rest.empty()) // Since we removed any trailing whitespace from the input using trim(), @@ -661,6 +667,40 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const { // Therefore, we can add a space without worrying about trailing spaces. OS << ' '; } +} + +void Paragraph::renderPlainText(llvm::raw_ostream &OS) const { + bool NeedsSpace = false; + std::string ParagraphText; + ParagraphText.reserve(EstimatedStringSize); + + llvm::raw_string_ostream ParagraphTextOS(ParagraphText); + + for (auto &C : Chunks) { + + if (C.Kind == ChunkKind::PlainText) { + if (C.SpaceBefore || NeedsSpace) + ParagraphTextOS << ' '; + + ParagraphTextOS << C.Contents; + NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter; + continue; + } + + if (C.SpaceBefore || NeedsSpace) + ParagraphTextOS << ' '; + llvm::StringRef Marker = ""; + if (C.Preserve && C.Kind == ChunkKind::InlineCode) + Marker = chooseMarker({"`", "'", "\""}, C.Contents); + else if (C.Kind == ChunkKind::Bold) + Marker = "**"; + else if (C.Kind == ChunkKind::Emphasized) + Marker = "*"; + ParagraphTextOS << Marker << C.Contents << Marker; + NeedsSpace = C.SpaceAfter; + } + + renderNewlinesPlaintext(OS, ParagraphText); // Paragraphs are separated by a blank line. OS << "\n\n"; diff --git a/clang-tools-extra/clangd/support/Markup.h b/clang-tools-extra/clangd/support/Markup.h index eea6328f69a12..219a7dad1e175 100644 --- a/clang-tools-extra/clangd/support/Markup.h +++ b/clang-tools-extra/clangd/support/Markup.h @@ -92,9 +92,84 @@ class Paragraph : public Block { llvm::StringRef chooseMarker(llvm::ArrayRef Options, llvm::StringRef Text) const; - bool punctuationIndicatesLineBreak(llvm::StringRef Line) const; - bool isHardLineBreakIndicator(llvm::StringRef Rest) const; - bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) const; + + /// Checks if the given line ends with punctuation that indicates a line break + /// (.:,;!?). + /// + /// If \p IsMarkdown is false, lines ending with 2 spaces are also considered + /// as indicating a line break. This is not needed for markdown because the + /// client renderer will handle this case. + bool punctuationIndicatesLineBreak(llvm::StringRef Line, + bool IsMarkdown) const; + + /// Checks if the given line starts with a hard line break indicator. + /// + /// If \p IsMarkdown is true, only '@' and '\' are considered as indicators. + /// Otherwise, '-', '*', '@', '\', '>', '#', '`' and a digit followed by '.' + /// or ')' are also considered as indicators. + bool isHardLineBreakIndicator(llvm::StringRef Rest, bool IsMarkdown) const; + + /// Checks if a hard line break should be added after the given line. + bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest, + bool IsMarkdown) const; + + /// \brief Go through the contents line by line to handle the newlines + /// and required spacing correctly for markdown rendering. + /// + /// Newlines are added if: + /// - the line ends with punctuation that indicates a line break (.:,;!?) + /// - the next line starts with a hard line break indicator \\ (escaped + /// markdown/doxygen command) or @ (doxygen command) + /// + /// This newline handling is only used when the client requests markdown + /// for hover/signature help content. + /// Markdown does not add any newlines inside paragraphs unless the user + /// explicitly adds them. For hover/signature help content, we still want to + /// add newlines in some cases to improve readability, especially when doxygen + /// parsing is disabled or not implemented (like for signature help). + /// Therefore we add newlines in the above mentioned cases. + /// + /// In addition to that, we need to consider that the user can configure + /// clangd to treat documentation comments as plain text, while the client + /// requests markdown. + /// In this case, all markdown syntax is escaped and will + /// not be rendered as expected by markdown. + /// Examples are lists starting with '-' or headings starting with '#'. + /// With the above next line heuristics, these cases are also covered by the + /// '\\' new line indicator. + /// + /// FIXME: The heuristic fails e.g. for lists starting with '*' because it is + /// also used for emphasis in markdown and should not be treated as a newline. + /// + /// \param OS The stream to render to. + /// \param ParagraphText The text of the paragraph to render. + void renderNewlinesMarkdown(llvm::raw_ostream &OS, + llvm::StringRef ParagraphText) const; + + /// \brief Go through the contents line by line to handle the newlines + /// and required spacing correctly for plain text rendering. + /// + /// Newlines are added if: + /// - the line ends with 2 spaces and a newline follows + /// - the line ends with punctuation that indicates a line break (.:,;!?) + /// - the next line starts with a hard line break indicator (-@>#`\\ or a + /// digit followed by '.' or ')'), ignoring leading whitespace. + /// + /// Otherwise, newlines in the input are replaced with a single space. + /// + /// Multiple spaces are collapsed into a single space. + /// + /// Lines containing only whitespace are ignored. + /// + /// This newline handling is only used when the client requests plain + /// text for hover/signature help content. + /// Therefore with this approach we mimic the behavior of markdown rendering + /// for these clients. + /// + /// \param OS The stream to render to. + /// \param ParagraphText The text of the paragraph to render. + void renderNewlinesPlaintext(llvm::raw_ostream &OS, + llvm::StringRef ParagraphText) const; }; /// Represents a sequence of one or more documents. Knows how to print them in a diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index 718c1bc5f355a..eb858ff616e90 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -4087,16 +4087,16 @@ As well as warnings @brief brief doc -longer doc +longer doc @note this is a note -As you see, notes are "inlined". +As you see, notes are "inlined". @warning this is a warning -As well as warnings -@param a this is a param -@return it returns something -@retval 0 if successful +As well as warnings +@param a this is a param +@return it returns something +@retval 0 if successful @retval 1 if failed --- @@ -4166,9 +4166,9 @@ As well as warnings)"}, @brief brief doc -longer doc -@param a this is a param -@param b does not exist +longer doc +@param a this is a param +@param b does not exist @return it returns something --- @@ -4315,19 +4315,19 @@ TEST(Hover, ParseDocumentation) { }, { "foo.\nbar", - "foo.\nbar", - "foo.\nbar", + "foo. \nbar", + "foo. \nbar", "foo.\nbar", }, { "foo. \nbar", - "foo. \nbar", - "foo. \nbar", + "foo. \nbar", + "foo. \nbar", "foo.\nbar", }, { "foo\n*bar", - "foo\n\\*bar", + "foo \n\\*bar", "foo\n*bar", "foo\n*bar", }, @@ -4354,6 +4354,24 @@ TEST(Hover, ParseDocumentation) { "\\`not\nparsed\\`", "`not\nparsed`", "`not parsed`", + }, + { + R"(@brief this is a typical use case +@param x this is x +\param y this is y +@return something)", + R"(@brief this is a typical use case +@param x this is x +\\param y this is y +@return something)", + R"(@brief this is a typical use case +@param x this is x +\param y this is y +@return something)", + R"(@brief this is a typical use case +@param x this is x +\param y this is y +@return something)", }}; for (const auto &C : Cases) { diff --git a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp index b3185cc10dd5a..676f7dfc74483 100644 --- a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp @@ -195,10 +195,10 @@ More description documentation)", normal textthis is an italic text this is a code block)", R"(\this is a bold text\ -normal text\this is an italic text\ +normal text\this is an italic text\ \this is a code block\)", R"(\this is a bold text\ -normal text\this is an italic text\ +normal text\this is an italic text\ \this is a code block\)", "this is a bold text normal textthis is an italic text " "this is a code block", @@ -712,10 +712,10 @@ TEST(SymbolDocumentation, MarkdownCodeSpans) { line \c span`)", R"(\`multi -line +line \\c span\`)", R"(`multi -line +line \c span`)", R"(`multi line \c span`)"}, diff --git a/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp b/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp index 5f91f31557176..af4782c07ae52 100644 --- a/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp @@ -304,9 +304,9 @@ TEST(Paragraph, SeparationOfChunks) { P.appendSpace().appendCode("code").appendText(".\n newline"); EXPECT_EQ(P.asEscapedMarkdown(), - "after `foobar` bat`no` `space` text `code`.\n newline"); + "after `foobar` bat`no` `space` text `code`. \n newline"); EXPECT_EQ(P.asMarkdown(), - "after `foobar` bat`no` `space` text `code`.\n newline"); + "after `foobar` bat`no` `space` text `code`. \n newline"); EXPECT_EQ(P.asPlainText(), "after foobar batno space text code.\nnewline"); } @@ -371,21 +371,117 @@ TEST(Paragraph, SeparationOfChunks3) { EXPECT_EQ(P.asPlainText(), "after\nfoobar"); P.appendText("- bat\n"); - EXPECT_EQ(P.asEscapedMarkdown(), "after \n foobar\n\\- bat"); + EXPECT_EQ(P.asEscapedMarkdown(), "after \n foobar \n\\- bat"); EXPECT_EQ(P.asMarkdown(), "after \n foobar\n- bat"); EXPECT_EQ(P.asPlainText(), "after\nfoobar\n- bat"); P.appendText("- baz"); - EXPECT_EQ(P.asEscapedMarkdown(), "after \n foobar\n\\- bat\n\\- baz"); + EXPECT_EQ(P.asEscapedMarkdown(), "after \n foobar \n\\- bat \n\\- baz"); EXPECT_EQ(P.asMarkdown(), "after \n foobar\n- bat\n- baz"); EXPECT_EQ(P.asPlainText(), "after\nfoobar\n- bat\n- baz"); P.appendText(" faz "); - EXPECT_EQ(P.asEscapedMarkdown(), "after \n foobar\n\\- bat\n\\- baz faz"); + EXPECT_EQ(P.asEscapedMarkdown(), + "after \n foobar \n\\- bat \n\\- baz faz"); EXPECT_EQ(P.asMarkdown(), "after \n foobar\n- bat\n- baz faz"); EXPECT_EQ(P.asPlainText(), "after\nfoobar\n- bat\n- baz faz"); } +TEST(Paragraph, PunctuationLineBreaks) { + + struct { + std::string Text; + std::string EscapedMarkdown; + std::string Markdown; + std::string PlainText; + } Cases[] = { + {"Line ending with dot.\nForces a visual linebreak.", + "Line ending with dot. \nForces a visual linebreak.", + "Line ending with dot. \nForces a visual linebreak.", + "Line ending with dot.\nForces a visual linebreak."}, + {"Line ending with colon:\nForces a visual linebreak.", + "Line ending with colon: \nForces a visual linebreak.", + "Line ending with colon: \nForces a visual linebreak.", + "Line ending with colon:\nForces a visual linebreak."}, + {"Line ending with semicolon:\nForces a visual linebreak.", + "Line ending with semicolon: \nForces a visual linebreak.", + "Line ending with semicolon: \nForces a visual linebreak.", + "Line ending with semicolon:\nForces a visual linebreak."}, + {"Line ending with comma,\nForces a visual linebreak.", + "Line ending with comma, \nForces a visual linebreak.", + "Line ending with comma, \nForces a visual linebreak.", + "Line ending with comma,\nForces a visual linebreak."}, + {"Line ending with exclamation mark!\nForces a visual linebreak.", + "Line ending with exclamation mark! \nForces a visual linebreak.", + "Line ending with exclamation mark! \nForces a visual linebreak.", + "Line ending with exclamation mark!\nForces a visual linebreak."}, + {"Line ending with question mark?\nForces a visual linebreak.", + "Line ending with question mark? \nForces a visual linebreak.", + "Line ending with question mark? \nForces a visual linebreak.", + "Line ending with question mark?\nForces a visual linebreak."}, + }; + + for (const auto &C : Cases) { + Paragraph P; + P.appendText(C.Text); + EXPECT_EQ(P.asEscapedMarkdown(), C.EscapedMarkdown); + EXPECT_EQ(P.asMarkdown(), C.Markdown); + EXPECT_EQ(P.asPlainText(), C.PlainText); + } +} + +TEST(Paragraph, LineBreakIndicators) { + + struct { + std::string Text; + std::string EscapedMarkdown; + std::string Markdown; + std::string PlainText; + } Cases[] = { + {"Visual linebreak for\n- list items\n- and so on", + "Visual linebreak for \n\\- list items \n\\- and so on", + "Visual linebreak for\n- list items\n- and so on", + "Visual linebreak for\n- list items\n- and so on"}, + {"Visual linebreak for\n* list items\n* and so on", + "Visual linebreak for \n\\* list items \n\\* and so on", + "Visual linebreak for\n* list items\n* and so on", + "Visual linebreak for\n* list items\n* and so on"}, + {"Visual linebreak for\n@command any doxygen command\n\\other other " + "doxygen command", + "Visual linebreak for \n@command any doxygen command \n\\\\other " + "other doxygen command", + "Visual linebreak for \n@command any doxygen command \n\\other other " + "doxygen command", + "Visual linebreak for\n@command any doxygen command\n\\other other " + "doxygen command"}, + {"Visual linebreak for\n>blockquoute line 1\n> blockquoute line 2", + "Visual linebreak for \n\\>blockquoute line 1 \n\\> blockquoute line " + "2", + "Visual linebreak for\n>blockquoute line 1\n> blockquoute line 2", + "Visual linebreak for\n>blockquoute line 1\n> blockquoute line 2"}, + {"Visual linebreak for\n# Heading 1\ntext under heading\n## Heading " + "2\ntext under heading 2", + "Visual linebreak for \n\\# Heading 1\ntext under heading \n\\## " + "Heading 2\ntext under heading 2", + "Visual linebreak for\n# Heading 1\ntext under heading\n## Heading " + "2\ntext under heading 2", + "Visual linebreak for\n# Heading 1 text under heading\n## Heading 2 " + "text under heading 2"}, + {"Visual linebreak for\n`inline code`", + "Visual linebreak for \n\\`inline code\\`", + "Visual linebreak for\n`inline code`", + "Visual linebreak for\n`inline code`"}, + }; + + for (const auto &C : Cases) { + Paragraph P; + P.appendText(C.Text); + EXPECT_EQ(P.asEscapedMarkdown(), C.EscapedMarkdown); + EXPECT_EQ(P.asMarkdown(), C.Markdown); + EXPECT_EQ(P.asPlainText(), C.PlainText); + } +} + TEST(Paragraph, ExtraSpaces) { // Make sure spaces inside chunks are preserved for markdown // and dropped for plain text. diff --git a/clang/test/CodeGen/AArch64/neon-across.c b/clang/test/CodeGen/AArch64/neon-across.c index d365975593559..aa0387d89dfef 100644 --- a/clang/test/CodeGen/AArch64/neon-across.c +++ b/clang/test/CodeGen/AArch64/neon-across.c @@ -49,7 +49,7 @@ uint32_t test_vaddlv_u16(uint16x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_s8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 @@ -60,7 +60,7 @@ int16_t test_vaddlvq_s8(int8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_s16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> [[A]]) // CHECK-NEXT: ret i32 [[VADDLV_I]] @@ -70,7 +70,7 @@ int32_t test_vaddlvq_s16(int16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_s32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i64 [[VADDLVQ_S32_I]] @@ -80,7 +80,7 @@ int64_t test_vaddlvq_s32(int32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_u8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 @@ -91,7 +91,7 @@ uint16_t test_vaddlvq_u8(uint8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_u16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> [[A]]) // CHECK-NEXT: ret i32 [[VADDLV_I]] @@ -101,7 +101,7 @@ uint32_t test_vaddlvq_u16(uint16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_u32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i64 [[VADDLVQ_U32_I]] @@ -155,7 +155,7 @@ uint16_t test_vmaxv_u16(uint16x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 @@ -166,7 +166,7 @@ int8_t test_vmaxvq_s8(int8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 @@ -177,7 +177,7 @@ int16_t test_vmaxvq_s16(int16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMAXVQ_S32_I]] @@ -187,7 +187,7 @@ int32_t test_vmaxvq_s32(int32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 @@ -198,7 +198,7 @@ uint8_t test_vmaxvq_u8(uint8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 @@ -209,7 +209,7 @@ uint16_t test_vmaxvq_u16(uint16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMAXVQ_U32_I]] @@ -263,7 +263,7 @@ uint16_t test_vminv_u16(uint16x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 @@ -274,7 +274,7 @@ int8_t test_vminvq_s8(int8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 @@ -285,7 +285,7 @@ int16_t test_vminvq_s16(int16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMINVQ_S32_I]] @@ -295,7 +295,7 @@ int32_t test_vminvq_s32(int32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 @@ -306,7 +306,7 @@ uint8_t test_vminvq_u8(uint8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 @@ -317,7 +317,7 @@ uint16_t test_vminvq_u16(uint16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VMINVQ_U32_I]] @@ -371,7 +371,7 @@ uint16_t test_vaddv_u16(uint16x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 @@ -382,7 +382,7 @@ int8_t test_vaddvq_s8(int8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 @@ -393,7 +393,7 @@ int16_t test_vaddvq_s16(int16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VADDVQ_S32_I]] @@ -403,7 +403,7 @@ int32_t test_vaddvq_s32(int32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u8 -// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 @@ -414,7 +414,7 @@ uint8_t test_vaddvq_u8(uint8x16_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[A]]) // CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 @@ -425,7 +425,7 @@ uint16_t test_vaddvq_u16(uint16x8_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u32 -// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[A]]) // CHECK-NEXT: ret i32 [[VADDVQ_U32_I]] @@ -435,7 +435,7 @@ uint32_t test_vaddvq_u32(uint32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_f32 -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret float [[VMAXVQ_F32_I]] @@ -445,7 +445,7 @@ float32_t test_vmaxvq_f32(float32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vminvq_f32 -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret float [[VMINVQ_F32_I]] @@ -455,7 +455,7 @@ float32_t test_vminvq_f32(float32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vmaxnmvq_f32 -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret float [[VMAXNMVQ_F32_I]] @@ -465,7 +465,7 @@ float32_t test_vmaxnmvq_f32(float32x4_t a) { } // CHECK-LABEL: define {{[^@]+}}@test_vminnmvq_f32 -// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret float [[VMINNMVQ_F32_I]] diff --git a/compiler-rt/lib/nsan/tests/NSanUnitTest.cpp b/compiler-rt/lib/nsan/tests/NSanUnitTest.cpp index 73b59671fe07a..d121292c36682 100644 --- a/compiler-rt/lib/nsan/tests/NSanUnitTest.cpp +++ b/compiler-rt/lib/nsan/tests/NSanUnitTest.cpp @@ -43,8 +43,8 @@ template void TestFT() { ASSERT_EQ(GetULPDiff(-X, -Y), 3); // Values with larger differences. - static constexpr const __uint128_t MantissaSize = - __uint128_t{1} << FTInfo::kMantissaBits; + static constexpr const __sanitizer::u64 MantissaSize = + __sanitizer::u64{1} << FTInfo::kMantissaBits; ASSERT_EQ(GetULPDiff(1.0, next(2.0, 1.0)), MantissaSize - 1); ASSERT_EQ(GetULPDiff(1.0, 2.0), MantissaSize); ASSERT_EQ(GetULPDiff(1.0, next(2.0, 3.0)), MantissaSize + 1); @@ -57,6 +57,11 @@ TEST(NSanTest, Double) { TestFT(nextafter)>(); } -TEST(NSanTest, Float128) { TestFT<__float128, nextafterf128>(); } +TEST(NSanTest, Float128) { + // Very basic tests. FIXME: improve when we have nextafter<__float128>. + ASSERT_EQ(GetULPDiff<__float128>(0.0, 0.0), 0); + ASSERT_EQ(GetULPDiff<__float128>(-0.0, 0.0), 0); + ASSERT_NE(GetULPDiff<__float128>(-0.01, 0.01), kMaxULPDiff); +} } // end namespace __nsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp index 62ab0554df08e..7fa5e017d3985 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp @@ -259,7 +259,9 @@ void InitializePlatform() { ThreadEventCallbacks callbacks = { .create = ThreadCreateCallback, + .start = nullptr, .terminate = ThreadTerminateCallback, + .destroy = nullptr, }; InstallPthreadIntrospectionHook(callbacks); #endif diff --git a/compiler-rt/test/fuzzer/fuzzer-ubsan.test b/compiler-rt/test/fuzzer/fuzzer-ubsan.test index d22339d72e261..6bc2c38636688 100644 --- a/compiler-rt/test/fuzzer/fuzzer-ubsan.test +++ b/compiler-rt/test/fuzzer/fuzzer-ubsan.test @@ -1,6 +1,3 @@ -// This test currently fails to compile on green.lab.llvm.org (arm) -// XFAIL: system-darwin && target=arm{{.*}} - RUN: %cpp_compiler -fsanitize=undefined -fno-sanitize-recover=all %S/SignedIntOverflowTest.cpp -o %t-SignedIntOverflowTest-Ubsan RUN: not %run %t-SignedIntOverflowTest-Ubsan 2>&1 | FileCheck %s CHECK: runtime error: signed integer overflow: 2147483647 + 1 cannot be represented in type 'int' diff --git a/compiler-rt/test/fuzzer/reduce_inputs.test b/compiler-rt/test/fuzzer/reduce_inputs.test index e65f572277297..d296fa42191af 100644 --- a/compiler-rt/test/fuzzer/reduce_inputs.test +++ b/compiler-rt/test/fuzzer/reduce_inputs.test @@ -12,5 +12,5 @@ RUN: %run %t-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --chec COUNT: seed corpus: files: 4 # a bit longer test -RUN: %run %t-ShrinkControlFlowTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -seed=42 -runs=1000000 2>&1 | FileCheck %s +RUN: %run %t-ShrinkControlFlowTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -seed=42 -runs=10000000 2>&1 | FileCheck %s diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst index 25d33a9c2eb50..980390c4fe3d7 100644 --- a/libcxx/docs/ReleaseNotes/22.rst +++ b/libcxx/docs/ReleaseNotes/22.rst @@ -76,8 +76,9 @@ Improvements and New Features - The ``std::{fill, fill_n}`` and ``std::ranges::{fill, fill_n}`` algorithms have been optimized for segmented iterators, resulting in a performance improvement of at least 10x for ``std::deque`` iterators and ``std::join_view>>`` iterators. -- The ``std::generate`` and ``std::generate_n`` algorithms have been optimized for segmented iterators, resulting in a - performance improvement for ``std::deque`` and ``std::join_view>>`` iterators. +- The ``std::{generate, generate_n}`` and ``std::ranges::generate_n`` algorithms have been optimized for segmented + iterators, resulting in a performance improvement for ``std::deque`` and + ``std::join_view>>`` iterators. Deprecations and Removals ------------------------- diff --git a/libcxx/include/__algorithm/generate_n.h b/libcxx/include/__algorithm/generate_n.h index e9da133f0570a..23899e49e0b65 100644 --- a/libcxx/include/__algorithm/generate_n.h +++ b/libcxx/include/__algorithm/generate_n.h @@ -13,22 +13,34 @@ #include <__config> #include <__functional/identity.h> #include <__utility/forward.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator -generate_n(_OutputIterator __first, _Size __orig_n, _Generator __gen) { +__generate_n(_OutputIterator __first, _Size __orig_n, _Generator& __gen) { using __iter_ref = decltype(*__first); __identity __proj; auto __f = [&](__iter_ref __element) { std::forward<__iter_ref>(__element) = __gen(); }; - return std::__for_each_n(__first, __orig_n, __f, __proj); + return std::__for_each_n(std::move(__first), __orig_n, __f, __proj); +} + +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +generate_n(_OutputIterator __first, _Size __orig_n, _Generator __gen) { + return std::__generate_n(std::move(__first), __orig_n, __gen); } _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_GENERATE_N_H diff --git a/libcxx/include/__algorithm/ranges_generate_n.h b/libcxx/include/__algorithm/ranges_generate_n.h index a318994d0eaf8..0cc9ce7b1193b 100644 --- a/libcxx/include/__algorithm/ranges_generate_n.h +++ b/libcxx/include/__algorithm/ranges_generate_n.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H #define _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H +#include <__algorithm/generate_n.h> #include <__concepts/constructible.h> #include <__concepts/invocable.h> #include <__config> @@ -38,12 +39,7 @@ struct __generate_n { requires invocable<_Func&> && indirectly_writable<_OutIter, invoke_result_t<_Func&>> _LIBCPP_HIDE_FROM_ABI constexpr _OutIter operator()(_OutIter __first, iter_difference_t<_OutIter> __n, _Func __gen) const { - for (; __n > 0; --__n) { - *__first = __gen(); - ++__first; - } - - return __first; + return std::__generate_n(std::move(__first), __n, __gen); } }; diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index e72ffd1f030ec..09939e29e5b75 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -386,7 +386,9 @@ ifeq (,$(filter 1, $(USE_LIBSTDCPP) $(USE_LIBCPP) $(USE_SYSTEM_STDLIB))) ifneq "$(LIBCPP_INCLUDE_TARGET_DIR)" "" CXXFLAGS += -cxx-isystem $(LIBCPP_INCLUDE_TARGET_DIR) endif - LDFLAGS += -L$(LIBCPP_LIBRARY_DIR) -Wl,-rpath,$(LIBCPP_LIBRARY_DIR) -lc++ + + # If `-nostdlib++` is not passed, clang will link to the system's stdlib. + LDFLAGS += -nostdlib++ -L$(LIBCPP_LIBRARY_DIR) -Wl,-rpath,$(LIBCPP_LIBRARY_DIR) -lc++ else USE_SYSTEM_STDLIB := 1 endif @@ -407,7 +409,8 @@ ifeq (1,$(USE_LIBCPP)) ifneq "$(LIBCPP_INCLUDE_TARGET_DIR)" "" CXXFLAGS += -cxx-isystem $(LIBCPP_INCLUDE_TARGET_DIR) endif - LDFLAGS += -L$(LIBCPP_LIBRARY_DIR) -Wl,-rpath,$(LIBCPP_LIBRARY_DIR) -lc++ + # If `-nostdlib++` is not passed, clang will link to the system's stdlib. + LDFLAGS += -nostdlib++ -L$(LIBCPP_LIBRARY_DIR) -Wl,-rpath,$(LIBCPP_LIBRARY_DIR) -lc++ else ifeq "$(OS)" "Android" # Nothing to do, this is already handled in diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 76b6c8ec68c72..e8dbc964a943e 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -594,12 +594,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Check if suitable for a bit test if (N <= DL.getIndexSizeInBits(0u)) { - SmallPtrSet Dests; - for (auto I : SI.cases()) - Dests.insert(I.getCaseSuccessor()); + DenseMap DestMap; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + ++DestMap[BB]; + } - if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, - DL)) + if (TLI->isSuitableForBitTests(DestMap, MinCaseVal, MaxCaseVal, DL)) return 1; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index d6ed3a8f739b3..4058dd728e5d1 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1433,9 +1433,9 @@ class LLVM_ABI TargetLoweringBase { /// \p High as its lowest and highest case values, and expects \p NumCmps /// case value comparisons. Check if the number of destinations, comparison /// metric, and range are all suitable. - bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, - const APInt &Low, const APInt &High, - const DataLayout &DL) const { + bool isSuitableForBitTests( + const DenseMap &DestCmps, + const APInt &Low, const APInt &High, const DataLayout &DL) const { // FIXME: I don't think NumCmps is the correct metric: a single case and a // range of cases both require only one branch to lower. Just looking at the // number of clusters and destinations should be enough to decide whether to @@ -1446,6 +1446,20 @@ class LLVM_ABI TargetLoweringBase { if (!rangeFitsInWord(Low, High, DL)) return false; + unsigned NumDests = DestCmps.size(); + unsigned NumCmps = 0; + unsigned int MaxBitTestEntry = 0; + for (auto &DestCmp : DestCmps) { + NumCmps += DestCmp.second; + if (DestCmp.second > MaxBitTestEntry) + MaxBitTestEntry = DestCmp.second; + } + + // Comparisons might be cheaper for small number of comparisons, which can + // be Arch Target specific. + if (MaxBitTestEntry < getMinimumBitTestCmps()) + return false; + // Decide whether it's profitable to lower this range with bit tests. Each // destination requires a bit test and branch, and there is an overall range // check branch. For a small number of clusters, separate comparisons might @@ -2055,6 +2069,9 @@ class LLVM_ABI TargetLoweringBase { virtual bool isJumpTableRelative() const; + /// Retuen the minimum of largest number of comparisons in BitTest. + unsigned getMinimumBitTestCmps() const; + /// If a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. Register getStackPointerRegisterToSaveRestore() const { @@ -2577,6 +2594,9 @@ class LLVM_ABI TargetLoweringBase { /// Set to zero to generate unlimited jump tables. void setMaximumJumpTableSize(unsigned); + /// Set the minimum of largest of number of comparisons to generate BitTest. + void setMinimumBitTestCmps(unsigned Val); + /// If set to a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. void setStackPointerRegisterToSaveRestore(Register R) { @@ -3719,6 +3739,9 @@ class LLVM_ABI TargetLoweringBase { /// backend supports. unsigned MinCmpXchgSizeInBits; + /// The minimum of largest number of comparisons to use bit test for switch. + unsigned MinimumBitTestCmps; + /// This indicates if the target supports unaligned atomic operations. bool SupportsUnalignedAtomics; diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h index 9e24ae7d1b431..f431e1d8a38ef 100644 --- a/llvm/include/llvm/IR/AbstractCallSite.h +++ b/llvm/include/llvm/IR/AbstractCallSite.h @@ -137,7 +137,7 @@ class AbstractCallSite { /// Return true if @p U is the use that defines the callee of this ACS. bool isCallee(const Use *U) const { - if (isDirectCall()) + if (!isCallbackCall()) return CB->isCallee(U); assert(!CI.ParameterEncoding.empty() && @@ -154,7 +154,7 @@ class AbstractCallSite { /// Return the number of parameters of the callee. unsigned getNumArgOperands() const { - if (isDirectCall()) + if (!isCallbackCall()) return CB->arg_size(); // Subtract 1 for the callee encoding. return CI.ParameterEncoding.size() - 1; @@ -169,7 +169,7 @@ class AbstractCallSite { /// Return the operand index of the underlying instruction associated with /// the function parameter number @p ArgNo or -1 if there is none. int getCallArgOperandNo(unsigned ArgNo) const { - if (isDirectCall()) + if (!isCallbackCall()) return ArgNo; // Add 1 for the callee encoding. return CI.ParameterEncoding[ArgNo + 1]; @@ -183,7 +183,7 @@ class AbstractCallSite { /// Return the operand of the underlying instruction associated with the /// function parameter number @p ArgNo or nullptr if there is none. Value *getCallArgOperand(unsigned ArgNo) const { - if (isDirectCall()) + if (!isCallbackCall()) return CB->getArgOperand(ArgNo); // Add 1 for the callee encoding. return CI.ParameterEncoding[ArgNo + 1] >= 0 @@ -210,7 +210,7 @@ class AbstractCallSite { /// Return the pointer to function that is being called. Value *getCalledOperand() const { - if (isDirectCall()) + if (!isCallbackCall()) return CB->getCalledOperand(); return CB->getArgOperand(getCallArgOperandNoForCallee()); } diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 038c499fe236e..3fa8243c03423 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -198,7 +198,6 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters, assert(First <= Last); auto Prob = BranchProbability::getZero(); - unsigned NumCmps = 0; std::vector Table; DenseMap JTProbs; @@ -206,12 +205,16 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters, for (unsigned I = First; I <= Last; ++I) JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); + DenseMap DestMap; for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Prob += Clusters[I].Prob; const APInt &Low = Clusters[I].Low->getValue(); const APInt &High = Clusters[I].High->getValue(); - NumCmps += (Low == High) ? 1 : 2; + unsigned int NumCmp = (Low == High) ? 1 : 2; + const BasicBlock *BB = Clusters[I].MBB->getBasicBlock(); + DestMap[BB] += NumCmp; + if (I != First) { // Fill the gap between this and the previous cluster. const APInt &PreviousHigh = Clusters[I - 1].High->getValue(); @@ -226,9 +229,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters, JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } - unsigned NumDests = JTProbs.size(); - if (TLI->isSuitableForBitTests(NumDests, NumCmps, - Clusters[First].Low->getValue(), + if (TLI->isSuitableForBitTests(DestMap, Clusters[First].Low->getValue(), Clusters[Last].High->getValue(), *DL)) { // Clusters[First..Last] should be lowered as bit tests instead. return false; @@ -372,20 +373,19 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters, if (First == Last) return false; - BitVector Dests(FuncInfo.MF->getNumBlockIDs()); - unsigned NumCmps = 0; + DenseMap DestMap; for (int64_t I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); - Dests.set(Clusters[I].MBB->getNumber()); - NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + unsigned NumCmp = (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + const BasicBlock *BB = Clusters[I].MBB->getBasicBlock(); + DestMap[BB] += NumCmp; } - unsigned NumDests = Dests.count(); APInt Low = Clusters[First].Low->getValue(); APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); - if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL)) + if (!TLI->isSuitableForBitTests(DestMap, Low, High, *DL)) return false; APInt LowBound; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 59798b3cf201a..f3631fab885df 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -90,6 +91,11 @@ static cl::opt OptsizeJumpTableDensity( cl::desc("Minimum density for building a jump table in " "an optsize function")); +static cl::opt MinimumBitTestCmpsOverride( + "min-bit-test-cmps", cl::init(2), cl::Hidden, + cl::desc("Set minimum of largest number of comparisons " + "to use bit test for switch.")); + // FIXME: This option is only to test if the strict fp operation processed // correctly by preventing mutating strict fp operation to normal fp operation // during development. When the backend supports strict float operation, this @@ -719,6 +725,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) MinCmpXchgSizeInBits = 0; SupportsUnalignedAtomics = false; + + MinimumBitTestCmps = MinimumBitTestCmpsOverride; } // Define the virtual destructor out-of-line to act as a key method to anchor @@ -2129,6 +2137,14 @@ bool TargetLoweringBase::isJumpTableRelative() const { return getTargetMachine().isPositionIndependent(); } +unsigned TargetLoweringBase::getMinimumBitTestCmps() const { + return MinimumBitTestCmps; +} + +void TargetLoweringBase::setMinimumBitTestCmps(unsigned Val) { + MinimumBitTestCmps = Val; +} + Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { if (TM.Options.LoopAlignment) return Align(TM.Options.LoopAlignment); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d16b11686e3c1..60aa61e993b26 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9028,11 +9028,12 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( CallingConv::ID CallerCC = CallerF.getCallingConv(); // SME Streaming functions are not eligible for TCO as they may require - // the streaming mode or ZA to be restored after returning from the call. + // the streaming mode or ZA/ZT0 to be restored after returning from the call. SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI); if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() || CallAttrs.requiresPreservingAllZAState() || + CallAttrs.requiresPreservingZT0() || CallAttrs.caller().hasStreamingBody()) return false; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 54c89721bc1f0..0573f64084d6f 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1061,8 +1061,11 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, SDValue W0 = isUndef(PredV) ? DAG.getUNDEF(MVT::i64) : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV); - Words[IdxW].push_back(HiHalf(W0, DAG)); - Words[IdxW].push_back(LoHalf(W0, DAG)); + if (Bytes < BitBytes) { + Words[IdxW].push_back(HiHalf(W0, DAG)); + Words[IdxW].push_back(LoHalf(W0, DAG)); + } else + Words[IdxW].push_back(W0); while (Bytes < BitBytes) { IdxW ^= 1; @@ -1083,7 +1086,26 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, Bytes *= 2; } + while (Bytes > BitBytes) { + IdxW ^= 1; + Words[IdxW].clear(); + + if (Bytes <= 4) { + for (const SDValue &W : Words[IdxW ^ 1]) { + SDValue T = contractPredicate(W, dl, DAG); + Words[IdxW].push_back(T); + } + } else { + for (const SDValue &W : Words[IdxW ^ 1]) { + Words[IdxW].push_back(W); + } + } + Bytes /= 2; + } + assert(Bytes == BitBytes); + if (BitBytes == 1 && PredTy == MVT::v2i1) + ByteTy = MVT::getVectorVT(MVT::i16, HwLen); SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy); SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32); @@ -3157,6 +3179,9 @@ SDValue HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { auto *MemN = cast(Op.getNode()); + if (!MemN->getMemoryVT().isSimple()) + return Op; + MVT MemTy = MemN->getMemoryVT().getSimpleVT(); if (!isHvxPairTy(MemTy)) return Op; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 17f04d0fd05e8..20fc849ea4aa5 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -138,6 +138,11 @@ static cl::opt PPCMinimumJumpTableEntries( "ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC")); +static cl::opt PPCMinimumBitTestCmps( + "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, + cl::desc("Set minimum of largest number of comparisons to use bit test for " + "switch on PPC.")); + static cl::opt PPCGatherAllAliasesMaxDepth( "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()")); @@ -1436,6 +1441,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Re-evaluate this value on future HWs that can do better with mtctr. setMinimumJumpTableEntries(PPCMinimumJumpTableEntries); + // The default minimum of largest number in a BitTest cluster is 3. + setMinimumBitTestCmps(PPCMinimumBitTestCmps); + setMinFunctionAlignment(Align(4)); setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 410f20edc6281..f514621094f13 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -54634,6 +54634,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); SDLoc DL(N); // Attempt to pre-truncate inputs to arithmetic ops instead. @@ -54652,6 +54653,39 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) return V; + // Fold trunc(srl(load(p),amt)) -> load(p+amt/8) + // If we're shifting down byte aligned bit chunks from a larger load for + // truncation, see if we can convert the shift into a pointer offset instead. + // Limit this to normal (non-ext) scalar integer loads. + if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL && + Src.hasOneUse() && Src.getOperand(0).hasOneUse() && + ISD::isNormalLoad(Src.getOperand(0).getNode())) { + auto *Ld = cast(Src.getOperand(0)); + if (Ld->isSimple() && VT.isByteSized() && + isPowerOf2_64(VT.getSizeInBits())) { + SDValue ShAmt = Src.getOperand(1); + KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); + // Check the shift amount is byte aligned. + // Check the truncation doesn't use any shifted in (zero) top bits. + if (KnownAmt.countMinTrailingZeros() >= 3 && + KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() - + VT.getSizeInBits())) { + EVT PtrVT = Ld->getBasePtr().getValueType(); + SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT); + SDValue PtrByteOfs = + DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs, + DAG.getShiftAmountConstant(3, PtrVT, DL)); + SDValue NewPtr = DAG.getMemBasePlusOffset( + Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap); + SDValue NewLoad = + DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1), + NewLoad.getValue(1)); + return NewLoad; + } + } + } + // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 6d704e1dc893b..614c6ebd63be6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2148,7 +2148,7 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) { return nullptr; } -Value *InstCombinerImpl::foldPtrToIntOfGEP(Type *IntTy, Value *Ptr) { +Value *InstCombinerImpl::foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr) { // Look through chain of one-use GEPs. Type *PtrTy = Ptr->getType(); SmallVector GEPs; @@ -2210,7 +2210,7 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) { Mask->getType() == Ty) return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask); - if (Value *V = foldPtrToIntOfGEP(Ty, SrcOp)) + if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp)) return replaceInstUsesWith(CI, V); Value *Vec, *Scalar, *Index; @@ -2240,6 +2240,9 @@ Instruction *InstCombinerImpl::visitPtrToAddr(PtrToAddrInst &CI) { Mask->getType() == Ty) return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask); + if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp)) + return replaceInstUsesWith(CI, V); + // FIXME: Implement variants of ptrtoint folds. return commonCastTransforms(CI); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9c75d9a6711b9..d85e4f7590197 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -700,7 +700,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final /// folded operation. void PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN); - Value *foldPtrToIntOfGEP(Type *IntTy, Value *Ptr); + Value *foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr); Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, CmpPredicate Cond, Instruction &I); Instruction *foldSelectICmp(CmpPredicate Pred, SelectInst *SI, Value *RHS, diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll index 2583a93e514a2..5b81f5dafe421 100644 --- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll @@ -426,3 +426,21 @@ define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwin call void %callee() ret void } + +define void @disable_tailcallopt(ptr %callee) "aarch64_inout_zt0" nounwind { +; CHECK-COMMON-LABEL: disable_tailcallopt: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: sub sp, sp, #80 +; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x19, sp +; CHECK-COMMON-NEXT: str zt0, [x19] +; CHECK-COMMON-NEXT: smstop za +; CHECK-COMMON-NEXT: blr x0 +; CHECK-COMMON-NEXT: smstart za +; CHECK-COMMON-NEXT: ldr zt0, [x19] +; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #80 +; CHECK-COMMON-NEXT: ret + tail call void %callee() + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/inst_masked_store_bug1.ll b/llvm/test/CodeGen/Hexagon/inst_masked_store_bug1.ll new file mode 100644 index 0000000000000..fcf124699e8e7 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/inst_masked_store_bug1.ll @@ -0,0 +1,94 @@ +;; REQUIRES: asserts +;; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b %s -o - | FileCheck %s +;; Sanity check for lowering masked scatter without assertion errors. + +define void @outer_product(ptr %aptr, ptr %bptr, ptr %cptr, i32 %T, i32 %W) { +entry: + %W.ripple.bcast.splatinsert = insertelement <8 x i32> poison, i32 %W, i64 0 + %W.ripple.bcast.splat = shufflevector <8 x i32> %W.ripple.bcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer + %div1194 = lshr i32 %T, 3 + %cmp84.not = icmp ult i32 %T, 8 + br i1 %cmp84.not, label %for.end49, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %div10195 = lshr i32 %W, 3 + %cmp1782.not = icmp ult i32 %W, 8 + %arrayidx27.ripple.LS.dim.slope = mul <8 x i32> %W.ripple.bcast.splat, + %arrayidx27.ripple.LS.dim.slope.ripple.bcast = shufflevector <8 x i32> %arrayidx27.ripple.LS.dim.slope, <8 x i32> poison, <64 x i32> + %arrayidx27.ripple.LS.slope = add <64 x i32> %arrayidx27.ripple.LS.dim.slope.ripple.bcast, + %invariant.gep196 = getelementptr i8, ptr %cptr, <64 x i32> %arrayidx27.ripple.LS.slope + br label %for.body + +for.body: ; preds = %for.end, %for.body.preheader + %ripple.par.iv.085 = phi i32 [ %add48, %for.end ], [ 0, %for.body.preheader ] + %mul2 = shl i32 %ripple.par.iv.085, 3 + br i1 %cmp1782.not, label %for.end, label %for.body18.lr.ph + +for.body18.lr.ph: ; preds = %for.body + %arrayidx = getelementptr inbounds nuw i8, ptr %aptr, i32 %mul2 + %mul25 = mul i32 %mul2, %W + %gep197 = getelementptr i8, <64 x ptr> %invariant.gep196, i32 %mul25 + br label %for.body18 + +for.body18: ; preds = %for.body18, %for.body18.lr.ph + %ripple.par.iv15.083 = phi i32 [ 0, %for.body18.lr.ph ], [ %add28, %for.body18 ] + %mul19 = shl i32 %ripple.par.iv15.083, 3 + %.ripple.LS.instance184 = load <8 x i8>, ptr %arrayidx, align 1 + %.ripple.LS.instance184.ripple.bcast = shufflevector <8 x i8> %.ripple.LS.instance184, <8 x i8> poison, <64 x i32> + %arrayidx21 = getelementptr inbounds nuw i8, ptr %bptr, i32 %mul19 + %.ripple.LS.instance = load <8 x i8>, ptr %arrayidx21, align 1 + %.ripple.LS.instance.ripple.bcast = shufflevector <8 x i8> %.ripple.LS.instance, <8 x i8> poison, <64 x i32> + %mul23.ripple.LS.instance = mul <64 x i8> %.ripple.LS.instance.ripple.bcast, %.ripple.LS.instance184.ripple.bcast + %gep = getelementptr i8, <64 x ptr> %gep197, i32 %mul19 + tail call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> %mul23.ripple.LS.instance, <64 x ptr> %gep, i32 1, <64 x i1> splat (i1 true)) + %add28 = add nuw i32 %ripple.par.iv15.083, 1 + %cmp17 = icmp ult i32 %add28, %div10195 + br i1 %cmp17, label %for.body18, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body18 + %0 = shl i32 %add28, 3 + br label %for.end + +for.end: ; preds = %for.end.loopexit, %for.body + %ripple.par.iv15.0.lcssa = phi i32 [ 0, %for.body ], [ %0, %for.end.loopexit ] + %add30.ripple.bcast.splatinsert = insertelement <8 x i32> poison, i32 %ripple.par.iv15.0.lcssa, i64 0 + %add30.ripple.bcast.splat = shufflevector <8 x i32> %add30.ripple.bcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer + %add30.ripple.LS.instance = or disjoint <8 x i32> %add30.ripple.bcast.splat, + %cmp32.ripple.LS.instance = icmp ne i32 %ripple.par.iv15.0.lcssa, %W + %cmp32.ripple.LS.instance.ripple.bcast.splatinsert = insertelement <8 x i1> poison, i1 %cmp32.ripple.LS.instance, i64 0 + %cmp32.ripple.LS.instance.ripple.bcast.splat = shufflevector <8 x i1> %cmp32.ripple.LS.instance.ripple.bcast.splatinsert, <8 x i1> poison, <8 x i32> zeroinitializer + %cmp33.ripple.vectorized = icmp ult <8 x i32> %add30.ripple.LS.instance, %W.ripple.bcast.splat + %or.cond.ripple.LS.instance = select <8 x i1> %cmp32.ripple.LS.instance.ripple.bcast.splat, <8 x i1> %cmp33.ripple.vectorized, <8 x i1> zeroinitializer + %or.cond.ripple.LS.instance.ripple.bcast = shufflevector <8 x i1> %or.cond.ripple.LS.instance, <8 x i1> poison, <64 x i32> + %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle = shufflevector <8 x i1> %or.cond.ripple.LS.instance, <8 x i1> , <8 x i32> + %or.cond.ripple.LS.instance.ripple.reducelog2.operator = or <8 x i1> %or.cond.ripple.LS.instance, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle + %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle189 = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator, <8 x i1> , <8 x i32> + %or.cond.ripple.LS.instance.ripple.reducelog2.operator190 = or <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle189 + %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle191 = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator190, <8 x i1> poison, <8 x i32> + %or.cond.ripple.LS.instance.ripple.reducelog2.operator192 = or <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator190, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle191 + %ripple.red.extract.ripple.bcast.splat = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator192, <8 x i1> poison, <8 x i32> zeroinitializer + %arrayidx34.ripple.branch.clone = getelementptr inbounds nuw i8, ptr %aptr, i32 %mul2 + %.ripple.LS.instance188.ripple.branch.clone.ripple.masked.load = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %arrayidx34.ripple.branch.clone, i32 1, <8 x i1> %ripple.red.extract.ripple.bcast.splat, <8 x i8> poison) + %.ripple.LS.instance188.ripple.bcast.ripple.branch.clone = shufflevector <8 x i8> %.ripple.LS.instance188.ripple.branch.clone.ripple.masked.load, <8 x i8> poison, <64 x i32> + %arrayidx36.ripple.branch.clone = getelementptr inbounds nuw i8, ptr %bptr, i32 %ripple.par.iv15.0.lcssa + %.ripple.LS.instance187.ripple.branch.clone.ripple.masked.load = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %arrayidx36.ripple.branch.clone, i32 1, <8 x i1> %or.cond.ripple.LS.instance, <8 x i8> poison) + %.ripple.LS.instance187.ripple.bcast.ripple.branch.clone = shufflevector <8 x i8> %.ripple.LS.instance187.ripple.branch.clone.ripple.masked.load, <8 x i8> poison, <64 x i32> + %mul38.ripple.LS.instance.ripple.branch.clone = mul <64 x i8> %.ripple.LS.instance187.ripple.bcast.ripple.branch.clone, %.ripple.LS.instance188.ripple.bcast.ripple.branch.clone + %mul40.ripple.branch.clone = mul i32 %mul2, %W + %1 = getelementptr i8, ptr %cptr, i32 %mul40.ripple.branch.clone + %arrayidx42.ripple.branch.clone = getelementptr i8, ptr %1, i32 %ripple.par.iv15.0.lcssa + %arrayidx42.ripple.LS.instance.ripple.branch.clone = getelementptr i8, ptr %arrayidx42.ripple.branch.clone, <64 x i32> %arrayidx27.ripple.LS.slope + tail call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> %mul38.ripple.LS.instance.ripple.branch.clone, <64 x ptr> %arrayidx42.ripple.LS.instance.ripple.branch.clone, i32 1, <64 x i1> %or.cond.ripple.LS.instance.ripple.bcast) + %add48 = add nuw i32 %ripple.par.iv.085, 1 + %cmp = icmp ult i32 %add48, %div1194 + br i1 %cmp, label %for.body, label %for.end49 + +for.end49: ; preds = %for.end, %entry + ret void +} + +;; CHECK: outer_product +;; CHECK: {{r[0-9]+}} = lsr({{r[0-9]+}},#3) +;; CHECK: {{q[0-9]+}} = vand({{v[0-9]+}},{{r[0-9]+}}) +;; CHECK: {{v[0-9]+}} = vmux(q0,{{v[0-9]+}},{{v[0-9]+}}) +;; CHECK: vmem{{.*}} = {{v[0-9]+}} diff --git a/llvm/test/CodeGen/PowerPC/bittest.ll b/llvm/test/CodeGen/PowerPC/bittest.ll new file mode 100644 index 0000000000000..cba56e3d5798f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/bittest.ll @@ -0,0 +1,193 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs < %s -O3 -mcpu=ppc -mtriple powerpc-ibm-aix \ +; RUN: -ppc-asm-full-reg-names | FileCheck %s + +define i32 @foo(i32 noundef signext %x) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stwu r1, -64(r1) +; CHECK-NEXT: stw r0, 72(r1) +; CHECK-NEXT: cmpwi r3, 8 +; CHECK-NEXT: stw r31, 60(r1) # 4-byte Folded Spill +; CHECK-NEXT: mr r31, r3 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: ble cr0, L..BB0_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: cmpwi r31, 11 +; CHECK-NEXT: bge cr0, L..BB0_7 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: cmplwi r31, 9 +; CHECK-NEXT: beq cr0, L..BB0_9 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: cmplwi r31, 10 +; CHECK-NEXT: beq cr0, L..BB0_11 +; CHECK-NEXT: b L..BB0_13 +; CHECK-NEXT: L..BB0_4: # %entry +; CHECK-NEXT: cmplwi r31, 4 +; CHECK-NEXT: beq cr0, L..BB0_12 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: cmplwi r31, 7 +; CHECK-NEXT: beq cr0, L..BB0_11 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: cmplwi r31, 8 +; CHECK-NEXT: beq cr0, L..BB0_10 +; CHECK-NEXT: b L..BB0_13 +; CHECK-NEXT: L..BB0_7: # %entry +; CHECK-NEXT: beq cr0, L..BB0_10 +; CHECK-NEXT: # %bb.8: # %entry +; CHECK-NEXT: cmplwi r31, 12 +; CHECK-NEXT: bne cr0, L..BB0_13 +; CHECK-NEXT: L..BB0_9: # %sw.bb2 +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: bl .foo3[PR] +; CHECK-NEXT: nop +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: b L..BB0_13 +; CHECK-NEXT: L..BB0_10: # %sw.bb1 +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: bl .foo2[PR] +; CHECK-NEXT: nop +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: b L..BB0_13 +; CHECK-NEXT: L..BB0_11: # %sw.bb +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: bl .foo1[PR] +; CHECK-NEXT: nop +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: b L..BB0_13 +; CHECK-NEXT: L..BB0_12: # %sw.bb3 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: bl .foo4[PR] +; CHECK-NEXT: nop +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: L..BB0_13: # %return +; CHECK-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: lwz r0, 8(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + switch i32 %x, label %return [ + i32 7, label %sw.bb + i32 10, label %sw.bb + i32 8, label %sw.bb1 + i32 11, label %sw.bb1 + i32 9, label %sw.bb2 + i32 12, label %sw.bb2 + i32 4, label %sw.bb3 + ] + +sw.bb: ; preds = %entry, %entry + tail call void @foo1(i32 noundef signext %x) + br label %return + +sw.bb1: ; preds = %entry, %entry + tail call void @foo2(i32 noundef signext %x) + br label %return + +sw.bb2: ; preds = %entry, %entry + tail call void @foo3(i32 noundef signext %x) + br label %return + +sw.bb3: ; preds = %entry + tail call void @foo4(i32 noundef signext 4) + br label %return + +return: ; preds = %sw.bb, %sw.bb1, %sw.bb2, %sw.bb3, %entry + %retval.0 = phi i32 [ 0, %entry ], [ 4, %sw.bb3 ], [ %x, %sw.bb2 ], [ %x, %sw.bb1 ], [ %x, %sw.bb ] + ret i32 %retval.0 +} + +define i32 @goo(i32 noundef signext %x) { +; CHECK-LABEL: goo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stwu r1, -64(r1) +; CHECK-NEXT: stw r0, 72(r1) +; CHECK-NEXT: cmplwi r3, 12 +; CHECK-NEXT: stw r31, 60(r1) # 4-byte Folded Spill +; CHECK-NEXT: mr r31, r3 +; CHECK-NEXT: bgt cr0, L..BB1_7 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: slw r3, r3, r31 +; CHECK-NEXT: andi. r4, r3, 5632 +; CHECK-NEXT: bne cr0, L..BB1_4 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: andi. r3, r3, 2304 +; CHECK-NEXT: beq cr0, L..BB1_5 +; CHECK-NEXT: # %bb.3: # %sw.bb1 +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: bl .foo2[PR] +; CHECK-NEXT: nop +; CHECK-NEXT: b L..BB1_9 +; CHECK-NEXT: L..BB1_4: # %sw.bb2 +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: bl .foo3[PR] +; CHECK-NEXT: nop +; CHECK-NEXT: b L..BB1_9 +; CHECK-NEXT: L..BB1_5: # %entry +; CHECK-NEXT: cmplwi r31, 7 +; CHECK-NEXT: bne cr0, L..BB1_7 +; CHECK-NEXT: # %bb.6: # %sw.bb +; CHECK-NEXT: li r3, 7 +; CHECK-NEXT: li r31, 7 +; CHECK-NEXT: bl .foo1[PR] +; CHECK-NEXT: nop +; CHECK-NEXT: b L..BB1_9 +; CHECK-NEXT: L..BB1_7: # %entry +; CHECK-NEXT: cmplwi r31, 4 +; CHECK-NEXT: li r31, 0 +; CHECK-NEXT: bne cr0, L..BB1_9 +; CHECK-NEXT: # %bb.8: # %sw.bb3 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: li r31, 4 +; CHECK-NEXT: bl .foo4[PR] +; CHECK-NEXT: nop +; CHECK-NEXT: L..BB1_9: # %return +; CHECK-NEXT: mr r3, r31 +; CHECK-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: lwz r0, 8(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + switch i32 %x, label %return [ + i32 7, label %sw.bb + i32 8, label %sw.bb1 + i32 11, label %sw.bb1 + i32 9, label %sw.bb2 + i32 10, label %sw.bb2 + i32 12, label %sw.bb2 + i32 4, label %sw.bb3 + ] + +sw.bb: ; preds = %entry + tail call void @foo1(i32 noundef signext 7) + br label %return + +sw.bb1: ; preds = %entry, %entry + tail call void @foo2(i32 noundef signext %x) + br label %return + +sw.bb2: ; preds = %entry, %entry, %entry + tail call void @foo3(i32 noundef signext %x) + br label %return + +sw.bb3: ; preds = %entry + tail call void @foo4(i32 noundef signext 4) + br label %return + +return: ; preds = %sw.bb, %sw.bb1, %sw.bb2, %sw.bb3, %entry + %retval.0 = phi i32 [ 0, %entry ], [ 4, %sw.bb3 ], [ %x, %sw.bb2 ], [ %x, %sw.bb1 ], [ 7, %sw.bb ] + ret i32 %retval.0 +} + +declare void @foo1(i32 noundef signext) + +declare void @foo2(i32 noundef signext) + +declare void @foo3(i32 noundef signext) + +declare void @foo4(i32 noundef signext) diff --git a/llvm/test/CodeGen/X86/bfloat-calling-conv.ll b/llvm/test/CodeGen/X86/bfloat-calling-conv.ll index ea4d32bae9ccb..d08749174f85c 100644 --- a/llvm/test/CodeGen/X86/bfloat-calling-conv.ll +++ b/llvm/test/CodeGen/X86/bfloat-calling-conv.ll @@ -660,8 +660,7 @@ define <3 x bfloat> @call_ret_v3bf16(ptr %ptr) #0 { ; SSE2-LABEL: call_ret_v3bf16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax -; SSE2-NEXT: movl 4(%rdi), %eax -; SSE2-NEXT: pinsrw $0, %eax, %xmm1 +; SSE2-NEXT: pinsrw $0, 4(%rdi), %xmm1 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: callq returns_v3bf16@PLT @@ -725,8 +724,7 @@ define <3 x bfloat> @call_ret_v3bf16(ptr %ptr) #0 { ; AVXNECONVERT-LABEL: call_ret_v3bf16: ; AVXNECONVERT: # %bb.0: ; AVXNECONVERT-NEXT: pushq %rax -; AVXNECONVERT-NEXT: movl 4(%rdi), %eax -; AVXNECONVERT-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; AVXNECONVERT-NEXT: vpinsrw $0, 4(%rdi), %xmm0, %xmm0 ; AVXNECONVERT-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVXNECONVERT-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero ; AVXNECONVERT-NEXT: callq returns_v3bf16@PLT diff --git a/llvm/test/CodeGen/X86/trunc-srl-load.ll b/llvm/test/CodeGen/X86/trunc-srl-load.ll index 4dae1433b2196..d9c21d3a3f570 100644 --- a/llvm/test/CodeGen/X86/trunc-srl-load.ll +++ b/llvm/test/CodeGen/X86/trunc-srl-load.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64 ; Tests showing for the analysis of non-constant shift amounts to improve load address math @@ -12,42 +12,20 @@ define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind { ; X86-LABEL: extractSub64_16: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movb {{[0-9]+}}(%esp), %ch ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl 4(%eax), %esi -; X86-NEXT: movb %ch, %cl -; X86-NEXT: andb $16, %cl -; X86-NEXT: movl %esi, %eax -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: shrdl %cl, %esi, %edx -; X86-NEXT: testb $32, %ch -; X86-NEXT: jne .LBB0_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edx, %eax -; X86-NEXT: .LBB0_2: -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: popl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andl $48, %ecx +; X86-NEXT: shrl $3, %ecx +; X86-NEXT: movzwl (%eax,%ecx), %eax ; X86-NEXT: retl ; -; SSE-LABEL: extractSub64_16: -; SSE: # %bb.0: -; SSE-NEXT: movl %esi, %ecx -; SSE-NEXT: movq (%rdi), %rax -; SSE-NEXT: andb $48, %cl -; SSE-NEXT: # kill: def $cl killed $cl killed $ecx -; SSE-NEXT: shrq %cl, %rax -; SSE-NEXT: # kill: def $ax killed $ax killed $rax -; SSE-NEXT: retq -; -; AVX-LABEL: extractSub64_16: -; AVX: # %bb.0: -; AVX-NEXT: # kill: def $esi killed $esi def $rsi -; AVX-NEXT: andb $48, %sil -; AVX-NEXT: shrxq %rsi, (%rdi), %rax -; AVX-NEXT: # kill: def $ax killed $ax killed $rax -; AVX-NEXT: retq +; X64-LABEL: extractSub64_16: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andl $48, %esi +; X64-NEXT: shrl $3, %esi +; X64-NEXT: movzwl (%rdi,%rsi), %eax +; X64-NEXT: retq %idx_bounds = and i32 %idx, 63 %idx_align = and i32 %idx_bounds, -16 %sh = zext nneg i32 %idx_align to i64 @@ -60,67 +38,20 @@ define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind { define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind { ; X86-LABEL: extractSub128_16: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $32, %esp -; X86-NEXT: movzbl 12(%ebp), %eax -; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %esi -; X86-NEXT: movl 8(%ecx), %edi -; X86-NEXT: movl 12(%ecx), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, (%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andb $16, %cl -; X86-NEXT: shrb $3, %al -; X86-NEXT: andb $12, %al -; X86-NEXT: movzbl %al, %edx -; X86-NEXT: movl (%esp,%edx), %eax -; X86-NEXT: movl 4(%esp,%edx), %edx -; X86-NEXT: shrdl %cl, %edx, %eax -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: leal -8(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andl $112, %ecx +; X86-NEXT: shrl $3, %ecx +; X86-NEXT: movzwl (%eax,%ecx), %eax ; X86-NEXT: retl ; -; SSE-LABEL: extractSub128_16: -; SSE: # %bb.0: -; SSE-NEXT: movq (%rdi), %rax -; SSE-NEXT: movq 8(%rdi), %rdx -; SSE-NEXT: movl %esi, %ecx -; SSE-NEXT: andb $48, %cl -; SSE-NEXT: movq %rdx, %rdi -; SSE-NEXT: shrq %cl, %rdi -; SSE-NEXT: shrdq %cl, %rdx, %rax -; SSE-NEXT: testb $64, %sil -; SSE-NEXT: cmovneq %rdi, %rax -; SSE-NEXT: # kill: def $ax killed $ax killed $rax -; SSE-NEXT: retq -; -; AVX-LABEL: extractSub128_16: -; AVX: # %bb.0: -; AVX-NEXT: movq (%rdi), %rdx -; AVX-NEXT: movq 8(%rdi), %rax -; AVX-NEXT: movl %esi, %ecx -; AVX-NEXT: andb $48, %cl -; AVX-NEXT: shrdq %cl, %rax, %rdx -; AVX-NEXT: shrxq %rcx, %rax, %rax -; AVX-NEXT: testb $64, %sil -; AVX-NEXT: cmoveq %rdx, %rax -; AVX-NEXT: # kill: def $ax killed $ax killed $rax -; AVX-NEXT: retq +; X64-LABEL: extractSub128_16: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andl $112, %esi +; X64-NEXT: shrl $3, %esi +; X64-NEXT: movzwl (%rdi,%rsi), %eax +; X64-NEXT: retq %idx_bounds = and i32 %idx, 127 %idx_align = and i32 %idx_bounds, -16 %sh = zext nneg i32 %idx_align to i128 @@ -133,62 +64,20 @@ define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind { define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind { ; X86-LABEL: extractSub128_32: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $32, %esp -; X86-NEXT: movzbl 12(%ebp), %eax -; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %esi -; X86-NEXT: movl 8(%ecx), %edi -; X86-NEXT: movl 12(%ecx), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, (%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: andb $96, %al -; X86-NEXT: shrb $3, %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl (%esp,%eax), %eax -; X86-NEXT: leal -8(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andl $96, %ecx +; X86-NEXT: shrl $3, %ecx +; X86-NEXT: movl (%eax,%ecx), %eax ; X86-NEXT: retl ; -; SSE-LABEL: extractSub128_32: -; SSE: # %bb.0: -; SSE-NEXT: movq (%rdi), %rax -; SSE-NEXT: movq 8(%rdi), %rdx -; SSE-NEXT: movl %esi, %ecx -; SSE-NEXT: andb $32, %cl -; SSE-NEXT: movq %rdx, %rdi -; SSE-NEXT: shrq %cl, %rdi -; SSE-NEXT: shrdq %cl, %rdx, %rax -; SSE-NEXT: testb $64, %sil -; SSE-NEXT: cmovneq %rdi, %rax -; SSE-NEXT: # kill: def $eax killed $eax killed $rax -; SSE-NEXT: retq -; -; AVX-LABEL: extractSub128_32: -; AVX: # %bb.0: -; AVX-NEXT: movq (%rdi), %rdx -; AVX-NEXT: movq 8(%rdi), %rax -; AVX-NEXT: movl %esi, %ecx -; AVX-NEXT: andb $32, %cl -; AVX-NEXT: shrdq %cl, %rax, %rdx -; AVX-NEXT: shrxq %rcx, %rax, %rax -; AVX-NEXT: testb $64, %sil -; AVX-NEXT: cmoveq %rdx, %rax -; AVX-NEXT: # kill: def $eax killed $eax killed $rax -; AVX-NEXT: retq +; X64-LABEL: extractSub128_32: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andl $96, %esi +; X64-NEXT: shrl $3, %esi +; X64-NEXT: movl (%rdi,%rsi), %eax +; X64-NEXT: retq %idx_bounds = and i32 %idx, 127 %idx_align = and i32 %idx_bounds, -32 %sh = zext nneg i32 %idx_align to i128 @@ -201,46 +90,20 @@ define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind { define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind { ; X86-LABEL: extractSub128_64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $32, %esp -; X86-NEXT: movzbl 12(%ebp), %eax -; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl 4(%ecx), %esi -; X86-NEXT: movl 8(%ecx), %edi -; X86-NEXT: movl 12(%ecx), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, (%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: andb $64, %al -; X86-NEXT: shrb $3, %al -; X86-NEXT: movzbl %al, %ecx -; X86-NEXT: movl (%esp,%ecx), %eax -; X86-NEXT: movl 4(%esp,%ecx), %edx -; X86-NEXT: leal -8(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl $64, %edx +; X86-NEXT: shrl $3, %edx +; X86-NEXT: movl (%ecx,%edx), %eax +; X86-NEXT: movl 4(%ecx,%edx), %edx ; X86-NEXT: retl ; ; X64-LABEL: extractSub128_64: ; X64: # %bb.0: -; X64-NEXT: testb $64, %sil -; X64-NEXT: je .LBB3_1 -; X64-NEXT: # %bb.2: -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: retq -; X64-NEXT: .LBB3_1: -; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andl $64, %esi +; X64-NEXT: shrl $3, %esi +; X64-NEXT: movq (%rdi,%rsi), %rax ; X64-NEXT: retq %idx_bounds = and i32 %idx, 127 %idx_align = and i32 %idx_bounds, -64 @@ -254,185 +117,20 @@ define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind { define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind { ; X86-LABEL: extractSub512_8: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $192, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl (%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 4(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 8(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 12(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 16(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 20(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 24(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 28(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 32(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 40(%eax), %ebx -; X86-NEXT: movl 44(%eax), %edi -; X86-NEXT: movl 48(%eax), %esi -; X86-NEXT: movl 52(%eax), %edx -; X86-NEXT: movl 56(%eax), %ecx -; X86-NEXT: movl 60(%eax), %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 12(%ebp), %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: andl $24, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: shrl $3, %edx -; X86-NEXT: andl $60, %edx -; X86-NEXT: movl 48(%esp,%edx), %eax -; X86-NEXT: movl 52(%esp,%edx), %edx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrdl %cl, %edx, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax -; X86-NEXT: leal -12(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shrl $3, %ecx +; X86-NEXT: andl $63, %ecx +; X86-NEXT: movzbl (%eax,%ecx), %eax ; X86-NEXT: retl ; -; SSE-LABEL: extractSub512_8: -; SSE: # %bb.0: -; SSE-NEXT: pushq %rax -; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: movups (%rdi), %xmm0 -; SSE-NEXT: movups 16(%rdi), %xmm1 -; SSE-NEXT: movups 32(%rdi), %xmm2 -; SSE-NEXT: movups 48(%rdi), %xmm3 -; SSE-NEXT: xorps %xmm4, %xmm4 -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movl %esi, %ecx -; SSE-NEXT: andl $56, %ecx -; SSE-NEXT: shrl $3, %esi -; SSE-NEXT: andl $56, %esi -; SSE-NEXT: movq -128(%rsp,%rsi), %rdx -; SSE-NEXT: shrq %cl, %rdx -; SSE-NEXT: movl -120(%rsp,%rsi), %eax -; SSE-NEXT: addl %eax, %eax -; SSE-NEXT: notl %ecx -; SSE-NEXT: # kill: def $cl killed $cl killed $ecx -; SSE-NEXT: shlq %cl, %rax -; SSE-NEXT: orl %edx, %eax -; SSE-NEXT: # kill: def $al killed $al killed $rax -; SSE-NEXT: popq %rcx -; SSE-NEXT: retq -; -; AVX2-LABEL: extractSub512_8: -; AVX2: # %bb.0: -; AVX2-NEXT: pushq %rax -; AVX2-NEXT: # kill: def $esi killed $esi def $rsi -; AVX2-NEXT: vmovups (%rdi), %ymm0 -; AVX2-NEXT: vmovups 32(%rdi), %ymm1 -; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movl %esi, %ecx -; AVX2-NEXT: andl $56, %ecx -; AVX2-NEXT: shrl $3, %esi -; AVX2-NEXT: andl $56, %esi -; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax -; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX2-NEXT: notl %ecx -; AVX2-NEXT: movl -120(%rsp,%rsi), %edx -; AVX2-NEXT: addl %edx, %edx -; AVX2-NEXT: shlxq %rcx, %rdx, %rcx -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: # kill: def $al killed $al killed $rax -; AVX2-NEXT: popq %rcx -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: extractSub512_8: -; AVX512: # %bb.0: -; AVX512-NEXT: pushq %rax -; AVX512-NEXT: vmovups (%rdi), %ymm0 -; AVX512-NEXT: vmovups 32(%rdi), %ymm1 -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: # kill: def $esi killed $esi def $rsi -; AVX512-NEXT: movl %esi, %ecx -; AVX512-NEXT: andl $56, %ecx -; AVX512-NEXT: shrl $3, %esi -; AVX512-NEXT: andl $56, %esi -; AVX512-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax -; AVX512-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx -; AVX512-NEXT: notl %ecx -; AVX512-NEXT: movl -120(%rsp,%rsi), %edx -; AVX512-NEXT: addl %edx, %edx -; AVX512-NEXT: shlxq %rcx, %rdx, %rcx -; AVX512-NEXT: orl %ecx, %eax -; AVX512-NEXT: # kill: def $al killed $al killed $rax -; AVX512-NEXT: popq %rcx -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; X64-LABEL: extractSub512_8: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: shrl $3, %esi +; X64-NEXT: andl $63, %esi +; X64-NEXT: movzbl (%rdi,%rsi), %eax +; X64-NEXT: retq %idx_bounds = and i32 %idx, 511 %idx_align = and i32 %idx_bounds, -8 %ld = load i512, ptr %word, align 8 @@ -445,152 +143,21 @@ define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind { define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind { ; X86-LABEL: extractSub512_64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $192, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl (%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 4(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 8(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 12(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 16(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 20(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 24(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 28(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 32(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 40(%eax), %ebx -; X86-NEXT: movl 44(%eax), %edi -; X86-NEXT: movl 48(%eax), %esi -; X86-NEXT: movl 52(%eax), %edx -; X86-NEXT: movl 56(%eax), %ecx -; X86-NEXT: movl 60(%eax), %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: shrl $3, %ecx -; X86-NEXT: andl $56, %ecx -; X86-NEXT: movl 48(%esp,%ecx), %eax -; X86-NEXT: movl 52(%esp,%ecx), %edx -; X86-NEXT: leal -12(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shrl $3, %edx +; X86-NEXT: andl $56, %edx +; X86-NEXT: movl (%ecx,%edx), %eax +; X86-NEXT: movl 4(%ecx,%edx), %edx ; X86-NEXT: retl ; -; SSE-LABEL: extractSub512_64: -; SSE: # %bb.0: -; SSE-NEXT: pushq %rax -; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: movups (%rdi), %xmm0 -; SSE-NEXT: movups 16(%rdi), %xmm1 -; SSE-NEXT: movups 32(%rdi), %xmm2 -; SSE-NEXT: movups 48(%rdi), %xmm3 -; SSE-NEXT: xorps %xmm4, %xmm4 -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: shrl $3, %esi -; SSE-NEXT: andl $56, %esi -; SSE-NEXT: movq -128(%rsp,%rsi), %rax -; SSE-NEXT: popq %rcx -; SSE-NEXT: retq -; -; AVX2-LABEL: extractSub512_64: -; AVX2: # %bb.0: -; AVX2-NEXT: pushq %rax -; AVX2-NEXT: vmovups (%rdi), %ymm0 -; AVX2-NEXT: vmovups 32(%rdi), %ymm1 -; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: # kill: def $esi killed $esi def $rsi -; AVX2-NEXT: shrl $3, %esi -; AVX2-NEXT: andl $56, %esi -; AVX2-NEXT: movq -128(%rsp,%rsi), %rax -; AVX2-NEXT: popq %rcx -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: extractSub512_64: -; AVX512: # %bb.0: -; AVX512-NEXT: pushq %rax -; AVX512-NEXT: vmovups (%rdi), %ymm0 -; AVX512-NEXT: vmovups 32(%rdi), %ymm1 -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: # kill: def $esi killed $esi def $rsi -; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: shrl $3, %esi -; AVX512-NEXT: andl $56, %esi -; AVX512-NEXT: movq -128(%rsp,%rsi), %rax -; AVX512-NEXT: popq %rcx -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; X64-LABEL: extractSub512_64: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: shrl $3, %esi +; X64-NEXT: andl $56, %esi +; X64-NEXT: movq (%rdi,%rsi), %rax +; X64-NEXT: retq %idx_bounds = and i32 %idx, 511 %idx_align = and i32 %idx_bounds, -64 %sh = zext nneg i32 %idx_align to i512 @@ -603,143 +170,35 @@ define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind { define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind { ; X86-LABEL: extractSub512_128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $192, %esp -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl (%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 4(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 8(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 12(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 16(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 20(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 24(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 28(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 32(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 40(%eax), %ebx -; X86-NEXT: movl 44(%eax), %edi -; X86-NEXT: movl 48(%eax), %esi -; X86-NEXT: movl 52(%eax), %edx -; X86-NEXT: movl 56(%eax), %ecx -; X86-NEXT: movl 60(%eax), %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: shrl $3, %edi -; X86-NEXT: andl $48, %edi -; X86-NEXT: movl 48(%esp,%edi), %ecx -; X86-NEXT: movl 52(%esp,%edi), %edx -; X86-NEXT: movl 56(%esp,%edi), %esi -; X86-NEXT: movl 60(%esp,%edi), %edi -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %edx, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shrl $3, %edx +; X86-NEXT: andl $48, %edx +; X86-NEXT: movl (%ecx,%edx), %esi +; X86-NEXT: movl 4(%ecx,%edx), %edi +; X86-NEXT: movl 8(%ecx,%edx), %ebx +; X86-NEXT: movl 12(%ecx,%edx), %ecx +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; -; SSE-LABEL: extractSub512_128: -; SSE: # %bb.0: -; SSE-NEXT: pushq %rax -; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: movups (%rdi), %xmm0 -; SSE-NEXT: movups 16(%rdi), %xmm1 -; SSE-NEXT: movups 32(%rdi), %xmm2 -; SSE-NEXT: movups 48(%rdi), %xmm3 -; SSE-NEXT: xorps %xmm4, %xmm4 -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: shrl $3, %esi -; SSE-NEXT: andl $48, %esi -; SSE-NEXT: movq -128(%rsp,%rsi), %rax -; SSE-NEXT: movq -120(%rsp,%rsi), %rdx -; SSE-NEXT: popq %rcx -; SSE-NEXT: retq -; -; AVX-LABEL: extractSub512_128: -; AVX: # %bb.0: -; AVX-NEXT: pushq %rax -; AVX-NEXT: # kill: def $esi killed $esi def $rsi -; AVX-NEXT: vmovups (%rdi), %ymm0 -; AVX-NEXT: vmovups 32(%rdi), %ymm1 -; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: shrl $3, %esi -; AVX-NEXT: andl $48, %esi -; AVX-NEXT: movq -128(%rsp,%rsi), %rax -; AVX-NEXT: movq -120(%rsp,%rsi), %rdx -; AVX-NEXT: popq %rcx -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; X64-LABEL: extractSub512_128: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: shrl $3, %esi +; X64-NEXT: andl $48, %esi +; X64-NEXT: movq (%rdi,%rsi), %rax +; X64-NEXT: movq 8(%rdi,%rsi), %rdx +; X64-NEXT: retq %idx_bounds = and i32 %idx, 511 %idx_align = and i32 %idx_bounds, -128 %sh = zext nneg i32 %idx_align to i512 @@ -752,916 +211,21 @@ define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind { define i64 @extractSub4096_64(ptr %word, i32 %idx) nounwind { ; X86-LABEL: extractSub4096_64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $1536, %esp # imm = 0x600 -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 4(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 8(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 12(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 16(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 20(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 24(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 28(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 32(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 40(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 44(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 48(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 52(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 56(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 60(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 64(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 68(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 72(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 76(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 80(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 84(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 88(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 92(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 96(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 100(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 104(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 108(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 112(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 116(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 120(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 124(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 128(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 132(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 136(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 140(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 144(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 148(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 152(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 156(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 160(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 164(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 168(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 172(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 176(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 180(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 184(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 188(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 192(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 196(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 200(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 204(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 208(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 212(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 216(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 220(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 224(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 228(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 232(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 236(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 240(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 244(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 248(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 252(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 256(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 260(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 264(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 268(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 272(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 276(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 280(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 284(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 288(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 292(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 296(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 300(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 304(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 308(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 312(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 316(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 320(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 324(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 328(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 332(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 336(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 340(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 344(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 348(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 352(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 356(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 360(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 364(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 368(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 372(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 376(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 380(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl (%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 384(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 388(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 392(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 396(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 400(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 404(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 408(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 412(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 416(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 420(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 424(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 428(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 432(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 436(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 440(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 444(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 448(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 452(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 456(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 460(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 464(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 468(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 472(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 476(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 480(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 484(%eax), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 488(%eax), %ebx -; X86-NEXT: movl 492(%eax), %edi -; X86-NEXT: movl 496(%eax), %esi -; X86-NEXT: movl 500(%eax), %edx -; X86-NEXT: movl 504(%eax), %ecx -; X86-NEXT: movl 508(%eax), %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl $4032, %ecx # imm = 0xFC0 -; X86-NEXT: andl 12(%ebp), %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: shrl $3, %ecx -; X86-NEXT: movl 496(%esp,%ecx), %eax -; X86-NEXT: movl 500(%esp,%ecx), %edx -; X86-NEXT: leal -12(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $4032, %edx # imm = 0xFC0 +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shrl $3, %edx +; X86-NEXT: movl (%ecx,%edx), %eax +; X86-NEXT: movl 4(%ecx,%edx), %edx ; X86-NEXT: retl ; -; SSE-LABEL: extractSub4096_64: -; SSE: # %bb.0: -; SSE-NEXT: subq $1176, %rsp # imm = 0x498 -; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: movups (%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 16(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 32(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 48(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 64(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 80(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 96(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 112(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 128(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; SSE-NEXT: movups 144(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 160(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 176(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 192(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 208(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 224(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 240(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 256(%rdi), %xmm0 -; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movups 272(%rdi), %xmm15 -; SSE-NEXT: movups 288(%rdi), %xmm14 -; SSE-NEXT: movups 304(%rdi), %xmm13 -; SSE-NEXT: movups 320(%rdi), %xmm12 -; SSE-NEXT: movups 336(%rdi), %xmm11 -; SSE-NEXT: movups 352(%rdi), %xmm10 -; SSE-NEXT: movups 368(%rdi), %xmm9 -; SSE-NEXT: movups 384(%rdi), %xmm8 -; SSE-NEXT: movups 400(%rdi), %xmm7 -; SSE-NEXT: movups 416(%rdi), %xmm6 -; SSE-NEXT: movups 432(%rdi), %xmm5 -; SSE-NEXT: movups 448(%rdi), %xmm4 -; SSE-NEXT: movups 464(%rdi), %xmm3 -; SSE-NEXT: movups 480(%rdi), %xmm2 -; SSE-NEXT: movups 496(%rdi), %xmm1 -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm3, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm4, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm5, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm8, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm9, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm10, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm11, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm12, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm13, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm14, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm15, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE-NEXT: andl $4032, %esi # imm = 0xFC0 -; SSE-NEXT: shrl $3, %esi -; SSE-NEXT: movq 144(%rsp,%rsi), %rax -; SSE-NEXT: addq $1176, %rsp # imm = 0x498 -; SSE-NEXT: retq -; -; AVX2-LABEL: extractSub4096_64: -; AVX2: # %bb.0: -; AVX2-NEXT: subq $936, %rsp # imm = 0x3A8 -; AVX2-NEXT: vmovups (%rdi), %ymm0 -; AVX2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-NEXT: vmovups 32(%rdi), %ymm1 -; AVX2-NEXT: vmovups 64(%rdi), %ymm2 -; AVX2-NEXT: vmovups 96(%rdi), %ymm3 -; AVX2-NEXT: vmovups 128(%rdi), %ymm4 -; AVX2-NEXT: vmovups 160(%rdi), %ymm5 -; AVX2-NEXT: vmovups 192(%rdi), %ymm6 -; AVX2-NEXT: vmovups 224(%rdi), %ymm7 -; AVX2-NEXT: vmovups 256(%rdi), %ymm8 -; AVX2-NEXT: vmovups 288(%rdi), %ymm9 -; AVX2-NEXT: vmovups 320(%rdi), %ymm10 -; AVX2-NEXT: vmovups 352(%rdi), %ymm11 -; AVX2-NEXT: vmovups 384(%rdi), %ymm12 -; AVX2-NEXT: vmovups 416(%rdi), %ymm13 -; AVX2-NEXT: vmovups 448(%rdi), %ymm14 -; AVX2-NEXT: vmovups 480(%rdi), %ymm15 -; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm15, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm14, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm13, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm12, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm11, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm10, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm9, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm8, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm3, (%rsp) -; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: # kill: def $esi killed $esi def $rsi -; AVX2-NEXT: andl $4032, %esi # imm = 0xFC0 -; AVX2-NEXT: shrl $3, %esi -; AVX2-NEXT: movq -96(%rsp,%rsi), %rax -; AVX2-NEXT: addq $936, %rsp # imm = 0x3A8 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: extractSub4096_64: -; AVX512: # %bb.0: -; AVX512-NEXT: subq $904, %rsp # imm = 0x388 -; AVX512-NEXT: # kill: def $esi killed $esi def $rsi -; AVX512-NEXT: vmovups (%rdi), %ymm0 -; AVX512-NEXT: vmovups 32(%rdi), %ymm1 -; AVX512-NEXT: vmovups 64(%rdi), %ymm2 -; AVX512-NEXT: vmovups 96(%rdi), %ymm3 -; AVX512-NEXT: vmovups 128(%rdi), %ymm4 -; AVX512-NEXT: vmovups 160(%rdi), %ymm5 -; AVX512-NEXT: vmovups 192(%rdi), %ymm6 -; AVX512-NEXT: vmovups 224(%rdi), %ymm7 -; AVX512-NEXT: vmovups 256(%rdi), %ymm8 -; AVX512-NEXT: vmovups 288(%rdi), %ymm9 -; AVX512-NEXT: vmovups 320(%rdi), %ymm10 -; AVX512-NEXT: vmovups 352(%rdi), %ymm11 -; AVX512-NEXT: vmovups 384(%rdi), %ymm12 -; AVX512-NEXT: vmovups 416(%rdi), %ymm13 -; AVX512-NEXT: andl $4032, %esi # imm = 0xFC0 -; AVX512-NEXT: vmovups 448(%rdi), %ymm14 -; AVX512-NEXT: vmovups 480(%rdi), %ymm15 -; AVX512-NEXT: vxorps %xmm16, %xmm16, %xmm16 -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm15, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm14, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm13, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm12, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm11, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm10, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm9, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm8, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm4, (%rsp) -; AVX512-NEXT: vmovups %ymm3, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: shrl $3, %esi -; AVX512-NEXT: movq -128(%rsp,%rsi), %rax -; AVX512-NEXT: addq $904, %rsp # imm = 0x388 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; X64-LABEL: extractSub4096_64: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andl $4032, %esi # imm = 0xFC0 +; X64-NEXT: shrl $3, %esi +; X64-NEXT: movq (%rdi,%rsi), %rax +; X64-NEXT: retq %idx_bounds = and i32 %idx, 4095 %idx_align = and i32 %idx_bounds, -64 %sh = zext nneg i32 %idx_align to i4096 diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll index 81c4d5d71084c..c3054a365c466 100644 --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll @@ -962,39 +962,22 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6 } define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2: # %bb.0: -; X64-NO-BMI2-NEXT: movups (%rdi), %xmm0 -; X64-NO-BMI2-NEXT: xorps %xmm1, %xmm1 -; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx -; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; X64-NO-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-NO-BMI2-NEXT: movl %ecx, %eax -; X64-NO-BMI2-NEXT: shrb $6, %al -; X64-NO-BMI2-NEXT: movzbl %al, %eax -; X64-NO-BMI2-NEXT: movq -72(%rsp,%rax,8), %rax -; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NO-BMI2-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NEXT: movb %al, (%rdx) -; X64-NO-BMI2-NEXT: retq -; -; X64-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movups (%rdi), %xmm0 -; X64-BMI2-NEXT: xorps %xmm1, %xmm1 -; X64-BMI2-NEXT: shll $3, %esi -; X64-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; X64-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; X64-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; X64-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-BMI2-NEXT: movl %esi, %eax -; X64-BMI2-NEXT: shrb $6, %al -; X64-BMI2-NEXT: movzbl %al, %eax -; X64-BMI2-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rax -; X64-BMI2-NEXT: movb %al, (%rdx) -; X64-BMI2-NEXT: retq +; X64-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movups (%rdi), %xmm0 +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: leal (,%rsi,8), %eax +; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $6, %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: leaq -72(%rsp,%rax,8), %rax +; X64-NEXT: andl $7, %esi +; X64-NEXT: movzbl (%rsi,%rax), %eax +; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: retq ; ; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: ; X86-NO-BMI2-NO-SHLD: # %bb.0: @@ -3417,7 +3400,6 @@ define void @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} -; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X86: {{.*}} ; X86-HAVE-BMI2-HAVE-SHLD: {{.*}} diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll index 8d36eef952a2b..84c2cc6d5ec31 100644 --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll @@ -1220,41 +1220,23 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) ; no @load_16byte_chunk_of_16byte_alloca define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-NO-BMI2: # %bb.0: -; X64-NO-BMI2-NEXT: movups (%rdi), %xmm0 -; X64-NO-BMI2-NEXT: movups 16(%rdi), %xmm1 -; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx -; X64-NO-BMI2-NEXT: xorps %xmm2, %xmm2 -; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) -; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) -; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; X64-NO-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-NO-BMI2-NEXT: movl %ecx, %eax -; X64-NO-BMI2-NEXT: shrb $6, %al -; X64-NO-BMI2-NEXT: movzbl %al, %eax -; X64-NO-BMI2-NEXT: movq -72(%rsp,%rax,8), %rax -; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NO-BMI2-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NEXT: movb %al, (%rdx) -; X64-NO-BMI2-NEXT: retq -; -; X64-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movups (%rdi), %xmm0 -; X64-BMI2-NEXT: movups 16(%rdi), %xmm1 -; X64-BMI2-NEXT: shll $3, %esi -; X64-BMI2-NEXT: xorps %xmm2, %xmm2 -; X64-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) -; X64-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) -; X64-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; X64-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-BMI2-NEXT: movl %esi, %eax -; X64-BMI2-NEXT: shrb $6, %al -; X64-BMI2-NEXT: movzbl %al, %eax -; X64-BMI2-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rax -; X64-BMI2-NEXT: movb %al, (%rdx) -; X64-BMI2-NEXT: retq +; X64-LABEL: load_1byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movups (%rdi), %xmm0 +; X64-NEXT: movups 16(%rdi), %xmm1 +; X64-NEXT: leal (,%rsi,8), %eax +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $6, %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: leaq -72(%rsp,%rax,8), %rax +; X64-NEXT: andl $7, %esi +; X64-NEXT: movzbl (%rsi,%rax), %eax +; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: retq ; ; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: ; X86-NO-BMI2-NO-SHLD: # %bb.0: @@ -2156,7 +2138,6 @@ define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst ; no @load_32byte_chunk_of_32byte_alloca ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} -; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X86: {{.*}} ; X86-NO-SHLD: {{.*}} diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll index 671a16d28f11a..adf3aa12623b9 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll @@ -261,3 +261,51 @@ define i32 @ptrtoaddr_of_ptrmask_addrsize(ptr addrspace(1) %p, i32 %mask) { %addr = ptrtoaddr ptr addrspace(1) %masked to i32 ret i32 %addr } + +define i64 @ptrtoaddr_of_gep_of_inttoptr(i64 %int, i64 %offset) { +; CHECK-LABEL: define i64 @ptrtoaddr_of_gep_of_inttoptr( +; CHECK-SAME: i64 [[INT:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[ADDR:%.*]] = add i64 [[INT]], [[OFFSET]] +; CHECK-NEXT: ret i64 [[ADDR]] +; + %ptr = inttoptr i64 %int to ptr + %gep = getelementptr i8, ptr %ptr, i64 %offset + %addr = ptrtoaddr ptr %gep to i64 + ret i64 %addr +} + +; FIXME: This could be supported by truncating %int before performing the +; arithmetic. +define i32 @ptrtoaddr_of_gep_of_inttoptr_addrsize(i64 %int, i32 %offset) { +; CHECK-LABEL: define i32 @ptrtoaddr_of_gep_of_inttoptr_addrsize( +; CHECK-SAME: i64 [[INT:%.*]], i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[PTR:%.*]] = inttoptr i64 [[INT]] to ptr addrspace(1) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PTR]], i32 [[OFFSET]] +; CHECK-NEXT: [[ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[GEP]] to i32 +; CHECK-NEXT: ret i32 [[ADDR]] +; + %ptr = inttoptr i64 %int to ptr addrspace(1) + %gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset + %addr = ptrtoaddr ptr addrspace(1) %gep to i32 + ret i32 %addr +} + +define i64 @ptrtoaddr_of_gep_of_null(i64 %offset) { +; CHECK-LABEL: define i64 @ptrtoaddr_of_gep_of_null( +; CHECK-SAME: i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: ret i64 [[OFFSET]] +; + %gep = getelementptr i8, ptr null, i64 %offset + %addr = ptrtoaddr ptr %gep to i64 + ret i64 %addr +} + +define i32 @ptrtoaddr_of_gep_of_null_addrsize(i32 %offset) { +; CHECK-LABEL: define i32 @ptrtoaddr_of_gep_of_null_addrsize( +; CHECK-SAME: i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: ret i32 [[OFFSET]] +; + %gep = getelementptr i8, ptr addrspace(1) null, i32 %offset + %addr = ptrtoaddr ptr addrspace(1) %gep to i32 + ret i32 %addr +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll new file mode 100644 index 0000000000000..a804225a380c8 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -S | FileCheck %s + +; Test whether the UTC format the switch-cases correctly, which requires TWO extra spaces. + +define i8 @testi8(i8 %x) { + switch i8 %x, label %default [ + i8 0, label %case1 + i8 1, label %case2 + i8 2, label %case3 + i8 3, label %case3 + ] +default: + ret i8 0 +case1: + ret i8 1 +case2: + ret i8 2 +case3: + ret i8 3 +} + +define i32 @testi32(i32 %x) { + switch i32 %x, label %default [ + i32 0, label %case1 + i32 1, label %case2 + i32 2, label %case3 + i32 3, label %case3 + ] +default: + ret i32 0 +case1: + ret i32 1 +case2: + ret i32 2 +case3: + ret i32 3 +} + +define i128 @testi128(i128 %x) { + switch i128 %x, label %default [ + i128 0, label %case1 + i128 1, label %case2 + i128 2, label %case3 + i128 3, label %case3 + ] +default: + ret i128 0 +case1: + ret i128 1 +case2: + ret i128 2 +case3: + ret i128 3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected new file mode 100644 index 0000000000000..b1977e7ae2ee2 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 7 +; RUN: opt < %s -S | FileCheck %s + +; Test whether the UTC format the switch-cases correctly, which requires TWO extra spaces. + +define i8 @testi8(i8 %x) { +; CHECK-LABEL: define i8 @testi8( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: switch i8 [[X]], label %[[DEFAULT:.*]] [ +; CHECK-NEXT: i8 0, label %[[CASE1:.*]] +; CHECK-NEXT: i8 1, label %[[CASE2:.*]] +; CHECK-NEXT: i8 2, label %[[CASE3:.*]] +; CHECK-NEXT: i8 3, label %[[CASE3]] +; CHECK-NEXT: ] +; CHECK: [[DEFAULT]]: +; CHECK-NEXT: ret i8 0 +; CHECK: [[CASE1]]: +; CHECK-NEXT: ret i8 1 +; CHECK: [[CASE2]]: +; CHECK-NEXT: ret i8 2 +; CHECK: [[CASE3]]: +; CHECK-NEXT: ret i8 3 +; + switch i8 %x, label %default [ + i8 0, label %case1 + i8 1, label %case2 + i8 2, label %case3 + i8 3, label %case3 + ] +default: + ret i8 0 +case1: + ret i8 1 +case2: + ret i8 2 +case3: + ret i8 3 +} + +define i32 @testi32(i32 %x) { +; CHECK-LABEL: define i32 @testi32( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ +; CHECK-NEXT: i32 0, label %[[CASE1:.*]] +; CHECK-NEXT: i32 1, label %[[CASE2:.*]] +; CHECK-NEXT: i32 2, label %[[CASE3:.*]] +; CHECK-NEXT: i32 3, label %[[CASE3]] +; CHECK-NEXT: ] +; CHECK: [[DEFAULT]]: +; CHECK-NEXT: ret i32 0 +; CHECK: [[CASE1]]: +; CHECK-NEXT: ret i32 1 +; CHECK: [[CASE2]]: +; CHECK-NEXT: ret i32 2 +; CHECK: [[CASE3]]: +; CHECK-NEXT: ret i32 3 +; + switch i32 %x, label %default [ + i32 0, label %case1 + i32 1, label %case2 + i32 2, label %case3 + i32 3, label %case3 + ] +default: + ret i32 0 +case1: + ret i32 1 +case2: + ret i32 2 +case3: + ret i32 3 +} + +define i128 @testi128(i128 %x) { +; CHECK-LABEL: define i128 @testi128( +; CHECK-SAME: i128 [[X:%.*]]) { +; CHECK-NEXT: switch i128 [[X]], label %[[DEFAULT:.*]] [ +; CHECK-NEXT: i128 0, label %[[CASE1:.*]] +; CHECK-NEXT: i128 1, label %[[CASE2:.*]] +; CHECK-NEXT: i128 2, label %[[CASE3:.*]] +; CHECK-NEXT: i128 3, label %[[CASE3]] +; CHECK-NEXT: ] +; CHECK: [[DEFAULT]]: +; CHECK-NEXT: ret i128 0 +; CHECK: [[CASE1]]: +; CHECK-NEXT: ret i128 1 +; CHECK: [[CASE2]]: +; CHECK-NEXT: ret i128 2 +; CHECK: [[CASE3]]: +; CHECK-NEXT: ret i128 3 +; + switch i128 %x, label %default [ + i128 0, label %case1 + i128 1, label %case2 + i128 2, label %case3 + i128 3, label %case3 + ] +default: + ret i128 0 +case1: + ret i128 1 +case2: + ret i128 2 +case3: + ret i128 3 +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/switch_case.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/switch_case.test new file mode 100644 index 0000000000000..891dbe06bbf59 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/switch_case.test @@ -0,0 +1,3 @@ +## switch_case test checking that update_test_checks.py works correctly +# RUN: cp -f %S/Inputs/switch_case.ll %t.ll && %update_test_checks %t.ll --version 7 +# RUN: diff -u %t.ll %S/Inputs/switch_case.ll.expected diff --git a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s index becd9d1b55693..519edf043be5d 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s @@ -1,6 +1,12 @@ ## This test uses TU index for type parsing in dwp and makes sure the DWARF4 type is ## successfully retrieved. +## cd to a unique dir so we can refer to the file as just "test.dwo" in the +## assembly test input below. +# RUN: rm -rf %t +# RUN: mkdir %t +# RUN: cd %t + # RUN: llvm-mc %s --split-dwarf-file=test.dwo -filetype obj -triple x86_64 -o test.o # RUN: llvm-dwp -e test.o -o test.dwp # RUN: llvm-dwarfdump test.dwp | FileCheck %s diff --git a/llvm/unittests/IR/AbstractCallSiteTest.cpp b/llvm/unittests/IR/AbstractCallSiteTest.cpp index ddb10911ad028..623d1b36e1c03 100644 --- a/llvm/unittests/IR/AbstractCallSiteTest.cpp +++ b/llvm/unittests/IR/AbstractCallSiteTest.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/AsmParser/Parser.h" #include "llvm/IR/AbstractCallSite.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/SourceMgr.h" @@ -51,5 +52,96 @@ TEST(AbstractCallSite, CallbackCall) { EXPECT_TRUE(ACS); EXPECT_TRUE(ACS.isCallbackCall()); EXPECT_TRUE(ACS.isCallee(CallbackUse)); + EXPECT_EQ(ACS.getCalleeUseForCallback(), *CallbackUse); EXPECT_EQ(ACS.getCalledFunction(), Callback); + + // The callback metadata {CallbackNo, Arg0No, ..., isVarArg} = {1, -1, true} + EXPECT_EQ(ACS.getCallArgOperandNoForCallee(), 1); + // Though the callback metadata only specifies ONE unfixed argument No, the + // callback callee is vararg, hence the third arg is also considered as + // another arg for the callback. + EXPECT_EQ(ACS.getNumArgOperands(), 2u); + Argument *Param0 = Callback->getArg(0), *Param1 = Callback->getArg(1); + ASSERT_TRUE(Param0 && Param1); + EXPECT_EQ(ACS.getCallArgOperandNo(*Param0), -1); + EXPECT_EQ(ACS.getCallArgOperandNo(*Param1), 2); +} + +TEST(AbstractCallSite, DirectCall) { + LLVMContext C; + + const char *IR = "declare void @bar(i32 %x, i32 %y)\n" + "define void @foo() {\n" + " call void @bar(i32 1, i32 2)\n" + " ret void\n" + "}\n"; + + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + + Function *Callee = M->getFunction("bar"); + ASSERT_NE(Callee, nullptr); + + const Use *DirectCallUse = Callee->getSingleUndroppableUse(); + ASSERT_NE(DirectCallUse, nullptr); + + AbstractCallSite ACS(DirectCallUse); + EXPECT_TRUE(ACS); + EXPECT_TRUE(ACS.isDirectCall()); + EXPECT_TRUE(ACS.isCallee(DirectCallUse)); + EXPECT_EQ(ACS.getCalledFunction(), Callee); + EXPECT_EQ(ACS.getNumArgOperands(), 2u); + Argument *ArgX = Callee->getArg(0); + ASSERT_NE(ArgX, nullptr); + Value *CAO1 = ACS.getCallArgOperand(*ArgX); + Value *CAO2 = ACS.getCallArgOperand(0); + ASSERT_NE(CAO2, nullptr); + // The two call arg operands should be the same object, since they are both + // the first argument of the call. + EXPECT_EQ(CAO2, CAO1); + + ConstantInt *FirstArgInt = dyn_cast(CAO2); + ASSERT_NE(FirstArgInt, nullptr); + EXPECT_EQ(FirstArgInt->getZExtValue(), 1ull); + + EXPECT_EQ(ACS.getCallArgOperandNo(*ArgX), 0); + EXPECT_EQ(ACS.getCallArgOperandNo(0), 0); + EXPECT_EQ(ACS.getCallArgOperandNo(1), 1); +} + +TEST(AbstractCallSite, IndirectCall) { + LLVMContext C; + + const char *IR = "define void @foo(ptr %0) {\n" + " call void %0(i32 1, i32 2)\n" + " ret void\n" + "}\n"; + + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + + Function *Fun = M->getFunction("foo"); + ASSERT_NE(Fun, nullptr); + + Argument *ArgAsCallee = Fun->getArg(0); + ASSERT_NE(ArgAsCallee, nullptr); + + const Use *IndCallUse = ArgAsCallee->getSingleUndroppableUse(); + ASSERT_NE(IndCallUse, nullptr); + + AbstractCallSite ACS(IndCallUse); + EXPECT_TRUE(ACS); + EXPECT_TRUE(ACS.isIndirectCall()); + EXPECT_TRUE(ACS.isCallee(IndCallUse)); + EXPECT_EQ(ACS.getCalledFunction(), nullptr); + EXPECT_EQ(ACS.getCalledOperand(), ArgAsCallee); + EXPECT_EQ(ACS.getNumArgOperands(), 2u); + Value *CalledOperand = ACS.getCallArgOperand(0); + ASSERT_NE(CalledOperand, nullptr); + ConstantInt *FirstArgInt = dyn_cast(CalledOperand); + ASSERT_NE(FirstArgInt, nullptr); + EXPECT_EQ(FirstArgInt->getZExtValue(), 1ull); + + EXPECT_EQ(ACS.getCallArgOperandNo(0), 0); + EXPECT_EQ(ACS.getCallArgOperandNo(1), 1); } diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index a5e3c39bfdecd..8cd200c93a482 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -29,6 +29,7 @@ 'none' and 'all'. 'smart' is the default. 5: Basic block labels are matched by FileCheck expressions 6: The semantics of TBAA checks has been incorporated in the check lines. +7: Indent switch-cases correctly. """ DEFAULT_VERSION = 6 @@ -606,6 +607,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") IS_DEBUG_RECORD_RE = re.compile(r"^(\s+)#dbg_") +IS_SWITCH_CASE_RE = re.compile(r"^\s+i\d+ \d+, label %\w+") SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index f88314547bb3f..9fba96a1471a0 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -945,7 +945,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): path = ( cmd_shenv.env["PATH"] if "PATH" in cmd_shenv.env else shenv.env["PATH"] ) - executable = lit.util.which(args[0], shenv.env["PATH"]) + executable = lit.util.which(args[0], path) if not executable: raise InternalShellError(j, "%r: command not found" % args[0]) diff --git a/llvm/utils/lit/tests/Inputs/shtest-env-path/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-env-path/lit.cfg new file mode 100644 index 0000000000000..36517f998530b --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-env-path/lit.cfg @@ -0,0 +1,8 @@ +import lit.formats + +config.name = "shtest-env-path" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest() +config.test_source_root = None +config.test_exec_root = None +config.substitutions.append(("%{python}", '"%s"' % (sys.executable))) diff --git a/llvm/utils/lit/tests/Inputs/shtest-env-path/path.txt b/llvm/utils/lit/tests/Inputs/shtest-env-path/path.txt new file mode 100644 index 0000000000000..b36e861ec5632 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-env-path/path.txt @@ -0,0 +1,8 @@ +## Tests env command for setting the PATH variable. + +## Check that test.sh can be found using the configured PATH. +# +# RUN: env PATH=%S test.sh | FileCheck --check-prefix=CHECK %s +# + +# CHECK: TEST-ENV-PATH-123 diff --git a/llvm/utils/lit/tests/Inputs/shtest-env-path/test.sh b/llvm/utils/lit/tests/Inputs/shtest-env-path/test.sh new file mode 100755 index 0000000000000..a1e46fc210d49 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-env-path/test.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +echo "TEST-ENV-PATH-123" + diff --git a/llvm/utils/lit/tests/shtest-env-path.py b/llvm/utils/lit/tests/shtest-env-path.py new file mode 100644 index 0000000000000..bf459ae53fbc0 --- /dev/null +++ b/llvm/utils/lit/tests/shtest-env-path.py @@ -0,0 +1,13 @@ +## Tests env command for setting the PATH variable. + +# The test is using /bin/sh. Limit to system known to have /bin/sh. +# REQUIRES: system-linux + +# RUN: %{lit} -a -v %{inputs}/shtest-env-path/path.txt \ +# RUN: | FileCheck -match-full-lines %s +# +# END. + +# CHECK: -- Testing: 1 tests{{.*}} +# CHECK: PASS: shtest-env-path :: path.txt (1 of 1) +# CHECK: -- diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index 3d07b16cac661..aef7c0987fda7 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -550,6 +550,7 @@ tools/UpdateTestChecks/update_test_checks/stable_ir_values5.test tools/UpdateTestChecks/update_test_checks/stable_ir_values6.test tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test tools/UpdateTestChecks/update_test_checks/stable_ir_values.test +tools/UpdateTestChecks/update_test_checks/switch_case.test tools/UpdateTestChecks/update_test_checks/tbaa-semantics-checks.test tools/UpdateTestChecks/update_test_checks/various_ir_values_dbgrecords.test Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py index 3b562fbc54f78..42227b20fca76 100755 --- a/llvm/utils/update_test_checks.py +++ b/llvm/utils/update_test_checks.py @@ -260,9 +260,17 @@ def update_test(ti: common.TestInfo): skip_same_checks=dropped_previous_line, ): # This input line of the function body will go as-is into the output. - # Except make leading whitespace uniform: 2 spaces. 4 for debug records. + # Except make leading whitespace uniform: 2 spaces. 4 for debug records/switch cases. indent = ( - " " if not common.IS_DEBUG_RECORD_RE.match(input_line) else " " + " " * 4 + if ( + common.IS_DEBUG_RECORD_RE.match(input_line) + or ( + ti.args.version > 6 + and common.IS_SWITCH_CASE_RE.match(input_line) + ) + ) + else " " * 2 ) input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(indent, input_line) output_lines.append(input_line) diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index fadd3fc10bfc4..66174ce0f7928 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -77,7 +77,7 @@ def ConditionOp : SCF_Op<"condition", [ //===----------------------------------------------------------------------===// def ExecuteRegionOp : SCF_Op<"execute_region", [ - DeclareOpInterfaceMethods]> { + DeclareOpInterfaceMethods, RecursiveMemoryEffects]> { let summary = "operation that executes its region exactly once"; let description = [{ The `scf.execute_region` operation is used to allow multiple blocks within SCF diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 0fe72394b61d6..9e46b7d78baca 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -313,25 +313,53 @@ struct DivOpConversion : public OpConversionPattern { struct ExpOpConversion : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; + // exp(x+I*y) = exp(x)*(cos(y)+I*sin(y)) + // Handle special cases as StableHLO implementation does: + // 1. When b == 0, set imag(exp(z)) = 0 + // 2. When exp(x) == inf, use exp(x/2)*(cos(y)+I*sin(y))*exp(x/2) LogicalResult matchAndRewrite(complex::ExpOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto loc = op.getLoc(); auto type = cast(adaptor.getComplex().getType()); - auto elementType = cast(type.getElementType()); - arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); - - Value real = - complex::ReOp::create(rewriter, loc, elementType, adaptor.getComplex()); - Value imag = - complex::ImOp::create(rewriter, loc, elementType, adaptor.getComplex()); - Value expReal = math::ExpOp::create(rewriter, loc, real, fmf.getValue()); - Value cosImag = math::CosOp::create(rewriter, loc, imag, fmf.getValue()); + auto ET = cast(type.getElementType()); + arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue(); + const auto &floatSemantics = ET.getFloatSemantics(); + ImplicitLocOpBuilder b(loc, rewriter); + + Value x = complex::ReOp::create(b, ET, adaptor.getComplex()); + Value y = complex::ImOp::create(b, ET, adaptor.getComplex()); + Value zero = arith::ConstantOp::create(b, ET, b.getZeroAttr(ET)); + Value half = arith::ConstantOp::create(b, ET, b.getFloatAttr(ET, 0.5)); + Value inf = arith::ConstantOp::create( + b, ET, b.getFloatAttr(ET, APFloat::getInf(floatSemantics))); + + Value exp = math::ExpOp::create(b, x, fmf); + Value xHalf = arith::MulFOp::create(b, x, half, fmf); + Value expHalf = math::ExpOp::create(b, xHalf, fmf); + Value cos = math::CosOp::create(b, y, fmf); + Value sin = math::SinOp::create(b, y, fmf); + + Value expIsInf = + arith::CmpFOp::create(b, arith::CmpFPredicate::OEQ, exp, inf, fmf); + Value yIsZero = + arith::CmpFOp::create(b, arith::CmpFPredicate::OEQ, y, zero); + + // Real path: select between exp(x)*cos(y) and exp(x/2)*cos(y)*exp(x/2) + Value realNormal = arith::MulFOp::create(b, exp, cos, fmf); + Value expHalfCos = arith::MulFOp::create(b, expHalf, cos, fmf); + Value realOverflow = arith::MulFOp::create(b, expHalfCos, expHalf, fmf); Value resultReal = - arith::MulFOp::create(rewriter, loc, expReal, cosImag, fmf.getValue()); - Value sinImag = math::SinOp::create(rewriter, loc, imag, fmf.getValue()); - Value resultImag = - arith::MulFOp::create(rewriter, loc, expReal, sinImag, fmf.getValue()); + arith::SelectOp::create(b, expIsInf, realOverflow, realNormal); + + // Imaginary part: if y == 0 return 0 else select between exp(x)*sin(y) and + // exp(x/2)*sin(y)*exp(x/2) + Value imagNormal = arith::MulFOp::create(b, exp, sin, fmf); + Value expHalfSin = arith::MulFOp::create(b, expHalf, sin, fmf); + Value imagOverflow = arith::MulFOp::create(b, expHalfSin, expHalf, fmf); + Value imagNonZero = + arith::SelectOp::create(b, expIsInf, imagOverflow, imagNormal); + Value resultImag = arith::SelectOp::create(b, yIsZero, zero, imagNonZero); rewriter.replaceOpWithNewOp(op, type, resultReal, resultImag); diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index dec62f92c7b2e..7a82236b0656e 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -211,11 +211,25 @@ func.func @complex_exp(%arg: complex) -> complex { } // CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex -// CHECK-DAG: %[[COS_IMAG:.*]] = math.cos %[[IMAG]] : f32 +// CHECK-DAG: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[HALF:.*]] = arith.constant 5.000000e-01 : f32 +// CHECK-DAG: %[[INF:.*]] = arith.constant 0x7F800000 : f32 // CHECK-DAG: %[[EXP_REAL:.*]] = math.exp %[[REAL]] : f32 -// CHECK-DAG: %[[RESULT_REAL:.]] = arith.mulf %[[EXP_REAL]], %[[COS_IMAG]] : f32 +// CHECK-DAG: %[[REAL_HALF:.*]] = arith.mulf %[[REAL]], %[[HALF]] : f32 +// CHECK-DAG: %[[EXP_HALF:.*]] = math.exp %[[REAL_HALF]] : f32 +// CHECK-DAG: %[[COS_IMAG:.*]] = math.cos %[[IMAG]] : f32 // CHECK-DAG: %[[SIN_IMAG:.*]] = math.sin %[[IMAG]] : f32 -// CHECK-DAG: %[[RESULT_IMAG:.*]] = arith.mulf %[[EXP_REAL]], %[[SIN_IMAG]] : f32 +// CHECK-DAG: %[[IS_INF:.*]] = arith.cmpf oeq, %[[EXP_REAL]], %[[INF]] : f32 +// CHECK-DAG: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 +// CHECK-DAG: %[[REAL_NORMAL:.*]] = arith.mulf %[[EXP_REAL]], %[[COS_IMAG]] : f32 +// CHECK-DAG: %[[EXP_HALF_COS:.*]] = arith.mulf %[[EXP_HALF]], %[[COS_IMAG]] : f32 +// CHECK-DAG: %[[REAL_OVERFLOW:.*]] = arith.mulf %[[EXP_HALF_COS]], %[[EXP_HALF]] : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[IS_INF]], %[[REAL_OVERFLOW]], %[[REAL_NORMAL]] : f32 +// CHECK-DAG: %[[IMAG_NORMAL:.*]] = arith.mulf %[[EXP_REAL]], %[[SIN_IMAG]] : f32 +// CHECK-DAG: %[[EXP_HALF_SIN:.*]] = arith.mulf %[[EXP_HALF]], %[[SIN_IMAG]] : f32 +// CHECK-DAG: %[[IMAG_OVERFLOW:.*]] = arith.mulf %[[EXP_HALF_SIN]], %[[EXP_HALF]] : f32 +// CHECK-DAG: %[[IMAG_NONZERO:.*]] = arith.select %[[IS_INF]], %[[IMAG_OVERFLOW]], %[[IMAG_NORMAL]] : f32 +// CHECK: %[[RESULT_IMAG:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[ZERO]], %[[IMAG_NONZERO]] : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex @@ -832,11 +846,25 @@ func.func @complex_exp_with_fmf(%arg: complex) -> complex { } // CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex -// CHECK-DAG: %[[COS_IMAG:.*]] = math.cos %[[IMAG]] fastmath : f32 +// CHECK-DAG: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[HALF:.*]] = arith.constant 5.000000e-01 : f32 +// CHECK-DAG: %[[INF:.*]] = arith.constant 0x7F800000 : f32 // CHECK-DAG: %[[EXP_REAL:.*]] = math.exp %[[REAL]] fastmath : f32 -// CHECK-DAG: %[[RESULT_REAL:.]] = arith.mulf %[[EXP_REAL]], %[[COS_IMAG]] fastmath : f32 +// CHECK-DAG: %[[REAL_HALF:.*]] = arith.mulf %[[REAL]], %[[HALF]] fastmath : f32 +// CHECK-DAG: %[[EXP_HALF:.*]] = math.exp %[[REAL_HALF]] fastmath : f32 +// CHECK-DAG: %[[COS_IMAG:.*]] = math.cos %[[IMAG]] fastmath : f32 // CHECK-DAG: %[[SIN_IMAG:.*]] = math.sin %[[IMAG]] fastmath : f32 -// CHECK-DAG: %[[RESULT_IMAG:.*]] = arith.mulf %[[EXP_REAL]], %[[SIN_IMAG]] fastmath : f32 +// CHECK-DAG: %[[IS_INF:.*]] = arith.cmpf oeq, %[[EXP_REAL]], %[[INF]] fastmath : f32 +// CHECK-DAG: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 +// CHECK-DAG: %[[REAL_NORMAL:.*]] = arith.mulf %[[EXP_REAL]], %[[COS_IMAG]] fastmath : f32 +// CHECK-DAG: %[[EXP_HALF_COS:.*]] = arith.mulf %[[EXP_HALF]], %[[COS_IMAG]] fastmath : f32 +// CHECK-DAG: %[[REAL_OVERFLOW:.*]] = arith.mulf %[[EXP_HALF_COS]], %[[EXP_HALF]] fastmath : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[IS_INF]], %[[REAL_OVERFLOW]], %[[REAL_NORMAL]] : f32 +// CHECK-DAG: %[[IMAG_NORMAL:.*]] = arith.mulf %[[EXP_REAL]], %[[SIN_IMAG]] fastmath : f32 +// CHECK-DAG: %[[EXP_HALF_SIN:.*]] = arith.mulf %[[EXP_HALF]], %[[SIN_IMAG]] fastmath : f32 +// CHECK-DAG: %[[IMAG_OVERFLOW:.*]] = arith.mulf %[[EXP_HALF_SIN]], %[[EXP_HALF]] fastmath : f32 +// CHECK-DAG: %[[IMAG_NONZERO:.*]] = arith.select %[[IS_INF]], %[[IMAG_OVERFLOW]], %[[IMAG_NORMAL]] : f32 +// CHECK: %[[RESULT_IMAG:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[ZERO]], %[[IMAG_NONZERO]] : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir index 40a57b90c6e99..e8bb0c0f2eff6 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir @@ -156,3 +156,24 @@ func.func @manual_deallocation(%c: i1, %f: f32, %idx: index) -> f32 { // CHECK: cf.assert %[[true]], "expected that the block does not have ownership" // CHECK: memref.dealloc %[[manual_alloc]] // CHECK: bufferization.dealloc (%[[managed_alloc]] : memref<5xf32>) if (%[[true]]) + +// ----- + +// CHECK-LABEL: func.func private @properly_creates_deallocations_in_execute_region( +// CHECK: %[[true:.*]] = arith.constant true +// CHECK: scf.execute_region no_inline { +// CHECK: %[[alloc:.*]] = memref.alloc() {alignment = 64 : i64} : memref<1x63x378x16xui8> +// CHECK: bufferization.dealloc (%[[alloc]] : memref<1x63x378x16xui8>) if (%[[true]]) + +func.func private @properly_creates_deallocations_in_execute_region(%arg1: memref<1x16x252x380xui8> ) -> (memref<1x250x378x16xui8> ) { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x250x378x16xui8> + scf.execute_region no_inline { + %subview = memref.subview %arg1[0, 0, 0, 0] [1, 16, 65, 380] [1, 1, 1, 1] : memref<1x16x252x380xui8> to memref<1x16x65x380xui8, strided<[1532160, 95760, 380, 1]>> + %alloc_3 = memref.alloc() {alignment = 64 : i64} : memref<1x63x378x16xui8> + test.buffer_based in(%subview: memref<1x16x65x380xui8, strided<[1532160, 95760, 380, 1]>>) out(%alloc_3: memref<1x63x378x16xui8>) + %subview_7 = memref.subview %alloc[0, 0, 0, 0] [1, 63, 378, 16] [1, 1, 1, 1] : memref<1x250x378x16xui8> to memref<1x63x378x16xui8, strided<[1512000, 6048, 16, 1]>> + test.copy(%alloc_3, %subview_7) : (memref<1x63x378x16xui8>, memref<1x63x378x16xui8, strided<[1512000, 6048, 16, 1]>>) + scf.yield + } + return %alloc : memref<1x250x378x16xui8> +} diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index d5f834bce9b83..8db1ebb87a1e5 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -381,15 +381,19 @@ func.func private @execute_region_test(%t1 : tensor) // ----- // CHECK-LABEL: func @no_inline_execute_region_not_canonicalized -func.func @no_inline_execute_region_not_canonicalized() { - %c = arith.constant 42 : i32 - // CHECK: scf.execute_region - // CHECK-SAME: no_inline - %v = scf.execute_region -> i32 no_inline { - scf.yield %c : i32 +module { + func.func private @foo()->() + func.func @no_inline_execute_region_not_canonicalized() { + %c = arith.constant 42 : i32 + // CHECK: scf.execute_region + // CHECK-SAME: no_inline + %v = scf.execute_region -> i32 no_inline { + func.call @foo():()->() + scf.yield %c : i32 + } + // CHECK: return + return } - // CHECK: return - return } // ----- diff --git a/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir b/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir index 1bcef0a0df316..ea587e92674d7 100644 --- a/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir +++ b/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir @@ -49,6 +49,11 @@ func.func @conj(%arg: complex) -> complex { func.return %conj : complex } +func.func @exp(%arg: complex) -> complex { + %exp = complex.exp %arg : complex + func.return %exp : complex +} + // %input contains pairs of lhs, rhs, i.e. [lhs_0, rhs_0, lhs_1, rhs_1,...] func.func @test_binary(%input: tensor>, %func: (complex, complex) -> complex) { @@ -353,5 +358,32 @@ func.func @entry() { call @test_element_f64(%abs_test_cast, %abs_func) : (tensor>, (complex) -> f64) -> () + // complex.exp test + %exp_test = arith.constant dense<[ + (1.0, 2.0), + // CHECK: -1.1312 + // CHECK-NEXT: 2.4717 + + // The first case to consider is overflow of exp(real_part). If computed + // directly, this yields inf * 0 = NaN, which is incorrect. + (500.0, 0.0), + // CHECK-NEXT: inf + // CHECK-NOT: nan + // CHECK-NEXT: 0 + + // In this case, the overflow of exp(real_part) is compensated when + // sin(imag_part) is close to zero, yielding a finite imaginary part. + (90.0238094, 5.900613e-39) + // CHECK-NEXT: inf + // CHECK-NOT: inf + // CHECK-NEXT: 7.3746 + ]> : tensor<3xcomplex> + %exp_test_cast = tensor.cast %exp_test + : tensor<3xcomplex> to tensor> + + %exp_func = func.constant @exp : (complex) -> complex + call @test_unary(%exp_test_cast, %exp_func) + : (tensor>, (complex) -> complex) -> () + func.return }