jgm · aantich · Nov 9, 2025 · Nov 9, 2025 · Nov 9, 2025
diff --git a/MANUAL.txt b/MANUAL.txt
@@ -273,6 +273,7 @@ header when requesting a document from a URL:
     - `opml` ([OPML])
     - `org` ([Emacs Org mode])
     - `pod` (Perl's [Plain Old Documentation])
+    - `pptx` ([PowerPoint])
     - `ris` ([RIS] bibliography)
     - `rtf` ([Rich Text Format])
     - `rst` ([reStructuredText])

diff --git a/pandoc.cabal b/pandoc.cabal
@@ -610,6 +610,7 @@ library
                    Text.Pandoc.Readers.TikiWiki,
                    Text.Pandoc.Readers.Txt2Tags,
                    Text.Pandoc.Readers.Docx,
+                   Text.Pandoc.Readers.Pptx,
                    Text.Pandoc.Readers.ODT,
                    Text.Pandoc.Readers.EPUB,
                    Text.Pandoc.Readers.Muse,
@@ -718,6 +719,11 @@ library
                    Text.Pandoc.Readers.Docx.Util,
                    Text.Pandoc.Readers.Docx.Symbols,
                    Text.Pandoc.Readers.Docx.Fields,
+                   Text.Pandoc.Readers.OOXML.Shared,
+                   Text.Pandoc.Readers.Pptx.Parse,
+                   Text.Pandoc.Readers.Pptx.Shapes,
+                   Text.Pandoc.Readers.Pptx.Slides,
+                   Text.Pandoc.Readers.Pptx.SmartArt,
                    Text.Pandoc.Readers.HTML.Parsing,
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
@@ -854,6 +860,7 @@ test-suite test-pandoc
                   Tests.Readers.RST
                   Tests.Readers.RTF
                   Tests.Readers.Docx
+                  Tests.Readers.Pptx
                   Tests.Readers.ODT
                   Tests.Readers.Txt2Tags
                   Tests.Readers.EPUB

diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
@@ -26,6 +26,7 @@ module Text.Pandoc.Readers
     Reader (..)
   , readers
   , readDocx
+  , readPptx
   , readODT
   , readMarkdown
   , readCommonMark
@@ -87,6 +88,7 @@ import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.Creole
 import Text.Pandoc.Readers.DocBook
 import Text.Pandoc.Readers.Docx
+import Text.Pandoc.Readers.Pptx
 import Text.Pandoc.Readers.DokuWiki
 import Text.Pandoc.Readers.EPUB
 import Text.Pandoc.Readers.FB2
@@ -157,6 +159,7 @@ readers = [("native"       , TextReader readNative)
           ,("twiki"        , TextReader readTWiki)
           ,("tikiwiki"     , TextReader readTikiWiki)
           ,("docx"         , ByteStringReader readDocx)
+          ,("pptx"         , ByteStringReader readPptx)
           ,("odt"          , ByteStringReader readODT)
           ,("t2t"          , TextReader readTxt2Tags)
           ,("epub"         , ByteStringReader readEPUB)

diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -24,51 +24,11 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , extractChildren
                                       ) where
 
-import qualified Data.Text as T
-import Data.Text (Text)
-import Text.Pandoc.XML.Light
-import qualified Data.Map as M
 import Data.List (partition)
-
-type NameSpaces = M.Map Text Text
-
-elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = foldr (\(Attr qn val) ->
-                             case qn of
-                               QName s _ (Just "xmlns") -> M.insert s val
-                               _ -> id) mempty . elAttribs
-
-elemName :: NameSpaces -> Text -> Text -> QName
-elemName ns prefix name =
-  QName name (M.lookup prefix ns)
-             (if T.null prefix then Nothing else Just prefix)
-
-isElem :: NameSpaces -> Text -> Text -> Element -> Bool
-isElem ns prefix name element =
-  let ns' = ns <> elemToNameSpaces element
-  in qName (elName element) == name &&
-     qURI (elName element) == M.lookup prefix ns'
-
-findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
-findChildByName ns pref name el =
-  let ns' = ns <> elemToNameSpaces el
-  in  findChild (elemName ns' pref name) el
-
-findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
-findChildrenByName ns pref name el =
-  let ns' = ns <> elemToNameSpaces el
-  in  findChildren (elemName ns' pref name) el
-
--- | Like 'findChildrenByName', but searches descendants.
-findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
-findElementByName ns pref name el =
-  let ns' = ns <> elemToNameSpaces el
-  in  findElement (elemName ns' pref name) el
-
-findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
-findAttrByName ns pref name el =
-  let ns' = ns <> elemToNameSpaces el
-  in  findAttr (elemName ns' pref name) el
+import Text.Pandoc.XML.Light
+import Text.Pandoc.Readers.OOXML.Shared
+  (NameSpaces, elemName, isElem, elemToNameSpaces,
+   findChildByName, findChildrenByName, findElementByName, findAttrByName)
 
 
 -- | Removes child elements that satisfy a given condition.

diff --git a/src/Text/Pandoc/Readers/OOXML/Shared.hs b/src/Text/Pandoc/Readers/OOXML/Shared.hs
@@ -0,0 +1,95 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Readers.OOXML.Shared
+   Copyright   : © 2025 Anton Antic
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Anton Antic <[email protected]>
+   Stability   : alpha
+   Portability : portable
+
+Shared utilities for Office Open XML (OOXML) readers (DOCX, PPTX).
+Provides common functions for ZIP archive handling, XML parsing,
+namespace management, and DrawingML parsing.
+-}
+module Text.Pandoc.Readers.OOXML.Shared
+  ( -- * Constants
+    emusPerInch
+  , emuToInches
+  , inchesToEmu
+    -- * Types
+  , NameSpaces
+  , elemName
+  , elemToNameSpaces
+  , isElem
+  , findChildByName
+  , findChildrenByName
+  , findElementByName
+  , findAttrByName
+  ) where
+
+import qualified Data.Map as M
+import qualified Data.Text as T
+import Data.Text (Text)
+import Text.Pandoc.XML.Light
+
+-- | Type alias for namespace mappings
+type NameSpaces = M.Map Text Text
+
+-- | English Metric Units per inch
+-- 1 inch = 914400 EMUs (used in OOXML for dimensions)
+emusPerInch :: Integer
+emusPerInch = 914400
+
+-- | Convert EMUs to inches
+emuToInches :: Integer -> Double
+emuToInches n = fromIntegral n / fromIntegral emusPerInch
+
+-- | Convert inches to EMUs
+inchesToEmu :: Double -> Integer
+inchesToEmu n = round (n * fromIntegral emusPerInch)
+
+-- | Extract namespace declarations from element attributes
+elemToNameSpaces :: Element -> NameSpaces
+elemToNameSpaces = foldr (\(Attr qn val) ->
+                            case qn of
+                              QName s _ (Just "xmlns") -> M.insert s val
+                              _ -> id) mempty . elAttribs
+
+-- | Create a qualified name from namespace map, prefix, and local name
+elemName :: NameSpaces -> Text -> Text -> QName
+elemName ns prefix name =
+  QName name
+        (M.lookup prefix ns)
+        (if T.null prefix then Nothing else Just prefix)
+
+-- | Check if element matches namespace prefix and local name
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
+isElem ns prefix name element =
+  let ns' = ns <> elemToNameSpaces element
+  in  qName (elName element) == name &&
+      qURI (elName element) == M.lookup prefix ns'
+
+-- | Find first child element matching namespace and name
+findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
+findChildByName ns pref name el =
+  let ns' = ns <> elemToNameSpaces el
+  in  findChild (elemName ns' pref name) el
+
+-- | Find all children matching namespace and name
+findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
+findChildrenByName ns pref name el =
+  let ns' = ns <> elemToNameSpaces el
+  in  findChildren (elemName ns' pref name) el
+
+-- | Find element anywhere in descendants matching namespace and name
+findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
+findElementByName ns pref name el =
+  let ns' = ns <> elemToNameSpaces el
+  in  findElement (elemName ns' pref name) el
+
+-- | Find attribute value by namespace prefix and name
+findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
+findAttrByName ns pref name el =
+  let ns' = ns <> elemToNameSpaces el
+  in  findAttr (elemName ns' pref name) el
diff --git a/src/Text/Pandoc/Readers/Pptx.hs b/src/Text/Pandoc/Readers/Pptx.hs
@@ -0,0 +1,43 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Readers.Pptx
+   Copyright   : © 2025 Anton Antic
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Anton Antic <[email protected]>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of PPTX (PowerPoint) documents to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Pptx (readPptx) where
+
+import qualified Data.ByteString.Lazy as B
+import qualified Data.Text as T
+import Codec.Archive.Zip (toArchiveOrFail)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition (Pandoc(..))
+import Text.Pandoc.Error (PandocError(..))
+import Text.Pandoc.Options (ReaderOptions)
+import Text.Pandoc.Readers.Pptx.Parse (archiveToPptx)
+import Text.Pandoc.Readers.Pptx.Slides (pptxToOutput)
+
+-- | Read PPTX file into Pandoc AST
+readPptx :: PandocMonad m => ReaderOptions -> B.ByteString -> m Pandoc
+readPptx opts bytes =
+  case toArchiveOrFail bytes of
+    Right archive ->
+      case archiveToPptx archive of
+        Right pptx -> do
+          -- Convert Pptx intermediate to Pandoc AST
+          (meta, blocks) <- pptxToOutput opts pptx
+          return $ Pandoc meta blocks
+
+        Left err ->
+          throwError $ PandocParseError $
+            "Failed to parse PPTX: " <> err
+
+    Left err ->
+      throwError $ PandocParseError $
+        "Failed to unpack PPTX archive: " <> T.pack err