diff --git a/README.md b/README.md index 1519a58..731d464 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ ## Features - Converts Confluence pages to Markdown format. -- Uses the Atlassian API to export individual pages, pages including children, and whole spaces. +- Uses the Atlassian API to export individual pages, pages including children, folders with subfolders, and whole spaces. - Supports various Confluence elements such as headings, paragraphs, lists, tables, and more. - Retains formatting such as bold, italic, and underline. - Converts Confluence macros to equivalent Markdown syntax where possible. @@ -50,7 +50,7 @@ pip install confluence-markdown-exporter ### 2. Exporting -Run the exporter with the desired Confluence page ID or space key. Execute the console application by typing `confluence-markdown-exporter` and one of the commands `pages`, `pages-with-descendants`, `spaces`, `all-spaces` or `config`. If a command is unclear, you can always add `--help` to get additional information. +Run the exporter with the desired Confluence page ID, folder ID, or space key. Execute the console application by typing `confluence-markdown-exporter` and one of the commands `pages`, `pages-with-descendants`, `folders`, `spaces`, `all-spaces` or `config`. If a command is unclear, you can always add `--help` to get additional information. > [!TIP] > Instead of `confluence-markdown-exporter` you can also use the shorthand `cf-export`. @@ -91,7 +91,23 @@ Export all Confluence pages of a single Space: confluence-markdown-exporter spaces ``` -#### 2.3. Export all Spaces +#### 2.4. Export Folder + +Export all Confluence pages within a folder and all its subfolders by folder ID: + +```sh +confluence-markdown-exporter folders +``` + +or by URL: + +```sh +confluence-markdown-exporter folders +``` + +This command **recursively exports all pages** from the specified folder and any nested subfolders within it. You can find the folder ID in the Confluence URL when viewing a folder, or from the folder's properties in Confluence. + +#### 2.5. Export all Spaces Export all Confluence pages across all spaces: diff --git a/confluence_markdown_exporter/confluence.py b/confluence_markdown_exporter/confluence.py index 6abb322..4557f94 100644 --- a/confluence_markdown_exporter/confluence.py +++ b/confluence_markdown_exporter/confluence.py @@ -185,6 +185,116 @@ def from_key(cls, space_key: str) -> "Space": ) +class Folder(BaseModel): + id: str + title: str + space: Space + + @property + def pages(self) -> list[int]: + """Get all page IDs within this folder and its subfolders recursively.""" + return self._get_all_pages() + + def _get_all_pages(self) -> list[int]: + """Recursively collect all page IDs from this folder and subfolders.""" + page_ids = [] + children = get_folder_children(self.id) + + for child in children: + child_type = child.get("type") + child_id = child.get("id") + + if child_type == "page" and child_id: + # It's a page - add it to our list + page_ids.append(int(child_id)) + elif child_type == "folder" and child_id: + # It's a subfolder - recursively get its pages + try: + subfolder = Folder.from_id(child_id) + page_ids.extend(subfolder.pages) + except (ApiError, HTTPError) as e: + logger.warning(f"Could not access subfolder {child_id}: {e}") + continue + + return page_ids + + def export(self) -> None: + """Export all pages within this folder.""" + page_ids = self.pages + if not page_ids: + logger.warning(f"No pages found in folder '{self.title}' (ID: {self.id})") + export_pages(page_ids) + + @classmethod + def from_json(cls, data: JsonResponse) -> "Folder": + """Create a Folder instance from API JSON response.""" + # Extract space key from the _links or _expandable section + space_key = "" + if "spaceId" in data: + # Try to get space from spaceId if available + space_id = data.get("spaceId", "") + try: + # Get space info from the v1 API + space_data = cast("JsonResponse", confluence.get_space(space_id, expand="homepage")) + space_key = space_data.get("key", "") + except (ApiError, HTTPError): + logger.warning(f"Could not fetch space for folder {data.get('id', '')}") + + return cls( + id=data.get("id", ""), + title=data.get("title", ""), + space=Space.from_key(space_key) if space_key else Space( + key="", name="", description="", homepage=0 + ), + ) + + @classmethod + @functools.lru_cache(maxsize=100) + def from_id(cls, folder_id: str) -> "Folder": + """Fetch a folder by ID and create a Folder instance.""" + try: + folder_data = get_folder_by_id(folder_id) + return cls.from_json(folder_data) + except (ApiError, HTTPError) as e: + msg = f"Could not access folder with ID {folder_id}: {e}" + raise ValueError(msg) from e + + @classmethod + def from_url(cls, folder_url: str) -> "Folder": + """Retrieve a Folder object given a Confluence folder URL. + + Supports URL patterns like: + - https://company.atlassian.net/wiki/spaces/SPACE/folders/123456 + - https://company.atlassian.net/wiki/spaces/SPACE/pages/folders/123456 + """ + url = urllib.parse.urlparse(folder_url) + hostname = url.hostname + if hostname and hostname not in str(settings.auth.confluence.url): + global confluence # noqa: PLW0603 + set_setting("auth.confluence.url", f"{url.scheme}://{hostname}/") + confluence = get_confluence_instance() # Refresh instance with new URL + + path = url.path.rstrip("/") + + # Try pattern: /wiki/spaces/SPACE/folders/123456 + if match := re.search(r"/wiki/spaces/[^/]+/folders/(\d+)", path): + folder_id = match.group(1) + return Folder.from_id(folder_id) + + # Try pattern: /wiki/spaces/SPACE/pages/folders/123456 + if match := re.search(r"/wiki/spaces/[^/]+/pages/folders/(\d+)", path): + folder_id = match.group(1) + return Folder.from_id(folder_id) + + # Try pattern: /wiki/.+?/folders/123456 (generic) + if match := re.search(r"/wiki/.+?/folders/(\d+)", path): + folder_id = match.group(1) + return Folder.from_id(folder_id) + + msg = f"Could not parse folder URL {folder_url}." + raise ValueError(msg) + + class Label(BaseModel): id: str name: str @@ -966,6 +1076,78 @@ def _get_path_for_href(self, path: Path, style: Literal["absolute", "relative"]) return result +def get_folder_by_id(folder_id: str) -> JsonResponse: + """Fetch folder metadata using Confluence REST API v2. + + Args: + folder_id: The folder ID. + + Returns: + JSON response containing folder metadata. + + Raises: + HTTPError: If the API request fails. + """ + url = f"api/v2/folders/{folder_id}" + response = confluence.get(url) + if not response: + msg = f"Folder with ID {folder_id} not found or not accessible" + raise ApiNotFoundError(msg) + return cast("JsonResponse", response) + + +def get_folder_children(folder_id: str) -> list[JsonResponse]: + """Fetch all children (pages and subfolders) from a folder with pagination. + + Args: + folder_id: The folder ID. + + Returns: + List of child objects (pages and folders) with metadata. + """ + all_children = [] + cursor = None + limit = 100 + + while True: + url = f"api/v2/folders/{folder_id}/children" + params = {"limit": limit} + if cursor: + params["cursor"] = cursor + + try: + response = confluence.get(url, params=params) + if not response: + break + + children = response.get("results", []) + if not children: + break + + all_children.extend(children) + + # Check for next page + links = response.get("_links", {}) + if "next" in links: + next_url = links["next"] + if "cursor=" in next_url: + cursor = next_url.split("cursor=")[1].split("&")[0] + else: + break + else: + break + + except HTTPError as e: + if e.response.status_code == 404: # noqa: PLR2004 + logger.warning( + f"Folder with ID {folder_id} not found (404) when fetching children." + ) + break + raise + + return all_children + + def export_page(page_id: int) -> None: """Export a Confluence page to Markdown. diff --git a/confluence_markdown_exporter/main.py b/confluence_markdown_exporter/main.py index 7b105fd..c709139 100644 --- a/confluence_markdown_exporter/main.py +++ b/confluence_markdown_exporter/main.py @@ -79,6 +79,30 @@ def spaces( space.export() +@app.command(help="Export all Confluence pages within one or more folders to Markdown.") +def folders( + folders: Annotated[list[str], typer.Argument(help="Folder ID(s) or URL(s)")], + output_path: Annotated[ + Path | None, + typer.Option( + help="Directory to write exported Markdown files to. Overrides config if set." + ), + ] = None, +) -> None: + from confluence_markdown_exporter.confluence import Folder + + with measure(f"Export folders {', '.join(folders)}"): + for folder in folders: + override_output_path_config(output_path) + # Detect if it's a URL or ID + _folder = ( + Folder.from_url(folder) + if folder.startswith(("http://", "https://")) + else Folder.from_id(folder) + ) + _folder.export() + + @app.command(help="Export all Confluence pages across all spaces to Markdown.") def all_spaces( output_path: Annotated[ diff --git a/tests/conftest.py b/tests/conftest.py index 3698885..9a51338 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Any from unittest.mock import MagicMock +from unittest.mock import patch import pytest from pydantic import AnyHttpUrl @@ -17,6 +18,22 @@ from confluence_markdown_exporter.utils.app_data_store import ExportConfig +def pytest_configure(config: pytest.Config) -> None: # noqa: ARG001 + """Configure pytest by mocking the Confluence instance before import.""" + # Mock get_confluence_instance to avoid authentication during test collection + # This is needed because confluence.py creates a module-level instance + patcher = patch("confluence_markdown_exporter.api_clients.get_confluence_instance") + mock = patcher.start() + mock_client = MagicMock() + mock.return_value = mock_client + + # Import the module now with the mock in place + import confluence_markdown_exporter.confluence # noqa: F401 + + # Stop the patcher after the module is loaded so individual tests can mock as needed + patcher.stop() + + @pytest.fixture def temp_config_dir() -> Generator[Path, None, None]: """Create a temporary directory for test configuration.""" diff --git a/tests/unit/test_confluence.py b/tests/unit/test_confluence.py new file mode 100644 index 0000000..aafe0bf --- /dev/null +++ b/tests/unit/test_confluence.py @@ -0,0 +1,307 @@ +"""Unit tests for confluence module.""" + +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from confluence_markdown_exporter.confluence import Folder +from confluence_markdown_exporter.confluence import get_folder_by_id +from confluence_markdown_exporter.confluence import get_folder_children + + +class TestGetFolderById: + """Test cases for get_folder_by_id function.""" + + @patch("confluence_markdown_exporter.confluence.confluence") + def test_successful_fetch(self, mock_confluence: MagicMock) -> None: + """Test successful folder fetch.""" + mock_response = { + "id": "123456", + "title": "Test Folder", + "type": "folder", + "spaceId": "TESTSPACE", + } + mock_confluence.get.return_value = mock_response + + result = get_folder_by_id("123456") + + assert result == mock_response + mock_confluence.get.assert_called_once_with("api/v2/folders/123456") + + @patch("confluence_markdown_exporter.confluence.confluence") + def test_folder_not_found(self, mock_confluence: MagicMock) -> None: + """Test folder not found raises error.""" + mock_confluence.get.return_value = None + + from atlassian.errors import ApiNotFoundError + + with pytest.raises(ApiNotFoundError, match="not found or not accessible"): + get_folder_by_id("invalid_id") + + +class TestGetFolderChildren: + """Test cases for get_folder_children function.""" + + @patch("confluence_markdown_exporter.confluence.confluence") + def test_fetch_children_single_page(self, mock_confluence: MagicMock) -> None: + """Test fetching children with single page result.""" + mock_response = { + "results": [ + {"id": "111", "type": "page", "title": "Test Page 1"}, + {"id": "222", "type": "page", "title": "Test Page 2"}, + ], + "_links": {}, + } + mock_confluence.get.return_value = mock_response + + result = get_folder_children("123456") + + assert len(result) == 2 + assert result[0]["id"] == "111" + assert result[1]["id"] == "222" + mock_confluence.get.assert_called_once() + + @patch("confluence_markdown_exporter.confluence.confluence") + def test_fetch_children_with_pagination(self, mock_confluence: MagicMock) -> None: + """Test fetching children with pagination.""" + # First page + mock_response_1 = { + "results": [{"id": "111", "type": "page", "title": "Page 1"}], + "_links": {"next": "/api/v2/folders/123/children?cursor=abc123"}, + } + # Second page + mock_response_2 = { + "results": [{"id": "222", "type": "page", "title": "Page 2"}], + "_links": {}, + } + + mock_confluence.get.side_effect = [mock_response_1, mock_response_2] + + result = get_folder_children("123456") + + assert len(result) == 2 + assert result[0]["id"] == "111" + assert result[1]["id"] == "222" + assert mock_confluence.get.call_count == 2 + + @patch("confluence_markdown_exporter.confluence.confluence") + def test_fetch_children_empty_folder(self, mock_confluence: MagicMock) -> None: + """Test fetching children from empty folder.""" + mock_response = {"results": [], "_links": {}} + mock_confluence.get.return_value = mock_response + + result = get_folder_children("123456") + + assert len(result) == 0 + + @patch("confluence_markdown_exporter.confluence.confluence") + def test_fetch_children_http_error_404(self, mock_confluence: MagicMock) -> None: + """Test handling 404 error when fetching children.""" + from requests import HTTPError + + mock_response = MagicMock() + mock_response.status_code = 404 + mock_confluence.get.side_effect = HTTPError(response=mock_response) + + result = get_folder_children("invalid_id") + + assert len(result) == 0 + + +class TestFolderClass: + """Test cases for Folder class.""" + + @patch("confluence_markdown_exporter.confluence.Space.from_key") + @patch("confluence_markdown_exporter.confluence.confluence") + def test_from_json(self, mock_confluence: MagicMock, mock_space_from_key: MagicMock) -> None: + """Test creating Folder from JSON.""" + from confluence_markdown_exporter.confluence import Space + + mock_space = Space(key="TESTSPACE", name="Test Space", description="", homepage=0) + mock_space_from_key.return_value = mock_space + mock_confluence.get_space.return_value = {"key": "TESTSPACE", "name": "Test Space"} + + folder_data = { + "id": "123456", + "title": "Test Folder", + "type": "folder", + "spaceId": "TESTSPACE", + } + + folder = Folder.from_json(folder_data) + + assert folder.id == "123456" + assert folder.title == "Test Folder" + + @patch("confluence_markdown_exporter.confluence.Space.from_key") + @patch("confluence_markdown_exporter.confluence.confluence") + @patch("confluence_markdown_exporter.confluence.get_folder_by_id") + def test_from_id( + self, + mock_get_folder: MagicMock, + mock_confluence: MagicMock, + mock_space_from_key: MagicMock, + ) -> None: + """Test creating Folder from ID.""" + from confluence_markdown_exporter.confluence import Space + + mock_space = Space(key="TESTSPACE", name="Test Space", description="", homepage=0) + mock_space_from_key.return_value = mock_space + + mock_get_folder.return_value = { + "id": "123456", + "title": "Test Folder", + "type": "folder", + "spaceId": "TESTSPACE", + } + + mock_confluence.get_space.return_value = {"key": "TESTSPACE", "name": "Test Space"} + + folder = Folder.from_id("123456") + + assert folder.id == "123456" + assert folder.title == "Test Folder" + mock_get_folder.assert_called_once_with("123456") + + @patch("confluence_markdown_exporter.confluence.Folder.from_id") + @patch("confluence_markdown_exporter.confluence.settings") + def test_from_url_spaces_folders_pattern( + self, mock_settings: MagicMock, mock_from_id: MagicMock + ) -> None: + """Test creating Folder from URL with /spaces/SPACE/folders/ pattern.""" + mock_settings.auth.confluence.url = "https://company.atlassian.net/" + + mock_folder = MagicMock() + mock_from_id.return_value = mock_folder + + url = "https://company.atlassian.net/wiki/spaces/MYSPACE/folders/123456" + result = Folder.from_url(url) + + mock_from_id.assert_called_once_with("123456") + assert result == mock_folder + + @patch("confluence_markdown_exporter.confluence.Folder.from_id") + @patch("confluence_markdown_exporter.confluence.settings") + def test_from_url_pages_folders_pattern( + self, mock_settings: MagicMock, mock_from_id: MagicMock + ) -> None: + """Test creating Folder from URL with /pages/folders/ pattern.""" + mock_settings.auth.confluence.url = "https://company.atlassian.net/" + + mock_folder = MagicMock() + mock_from_id.return_value = mock_folder + + url = "https://company.atlassian.net/wiki/spaces/MYSPACE/pages/folders/789012" + result = Folder.from_url(url) + + mock_from_id.assert_called_once_with("789012") + assert result == mock_folder + + @patch("confluence_markdown_exporter.confluence.Folder.from_id") + @patch("confluence_markdown_exporter.confluence.settings") + def test_from_url_generic_folders_pattern( + self, mock_settings: MagicMock, mock_from_id: MagicMock + ) -> None: + """Test creating Folder from URL with generic /folders/ pattern.""" + mock_settings.auth.confluence.url = "https://company.atlassian.net/" + + mock_folder = MagicMock() + mock_from_id.return_value = mock_folder + + url = "https://company.atlassian.net/wiki/x/folders/345678" + result = Folder.from_url(url) + + mock_from_id.assert_called_once_with("345678") + assert result == mock_folder + + @patch("confluence_markdown_exporter.confluence.settings") + def test_from_url_invalid_url(self, mock_settings: MagicMock) -> None: + """Test that invalid folder URL raises ValueError.""" + mock_settings.auth.confluence.url = "https://company.atlassian.net/" + + with pytest.raises(ValueError, match="Could not parse folder URL"): + Folder.from_url("https://company.atlassian.net/wiki/invalid/path") + + @patch("confluence_markdown_exporter.confluence.get_folder_children") + def test_pages_property_with_pages(self, mock_get_children: MagicMock) -> None: + """Test pages property returns page IDs.""" + from confluence_markdown_exporter.confluence import Space + + mock_space = Space(key="TEST", name="Test", description="", homepage=0) + + mock_get_children.return_value = [ + {"id": "111", "type": "page"}, + {"id": "222", "type": "page"}, + ] + + folder = Folder(id="123", title="Test", space=mock_space) + page_ids = folder.pages + + assert len(page_ids) == 2 + assert 111 in page_ids + assert 222 in page_ids + + @patch("confluence_markdown_exporter.confluence.get_folder_children") + def test_pages_property_empty_folder(self, mock_get_children: MagicMock) -> None: + """Test pages property with empty folder.""" + from confluence_markdown_exporter.confluence import Space + + mock_space = Space(key="TEST", name="Test", description="", homepage=0) + + mock_get_children.return_value = [] + + folder = Folder(id="123", title="Test", space=mock_space) + page_ids = folder.pages + + assert len(page_ids) == 0 + + @patch("confluence_markdown_exporter.confluence.export_pages") + @patch("confluence_markdown_exporter.confluence.get_folder_children") + def test_export_with_pages( + self, + mock_get_children: MagicMock, + mock_export_pages: MagicMock, + ) -> None: + """Test exporting folder with pages.""" + from confluence_markdown_exporter.confluence import Space + + mock_space = Space(key="TEST", name="Test", description="", homepage=0) + + mock_get_children.return_value = [ + {"id": "111", "type": "page"}, + {"id": "222", "type": "page"}, + ] + + folder = Folder(id="123", title="Test", space=mock_space) + folder.export() + + mock_export_pages.assert_called_once() + called_page_ids = mock_export_pages.call_args[0][0] + assert len(called_page_ids) == 2 + assert 111 in called_page_ids + assert 222 in called_page_ids + + @patch("confluence_markdown_exporter.confluence.export_pages") + @patch("confluence_markdown_exporter.confluence.get_folder_children") + def test_export_empty_folder( + self, + mock_get_children: MagicMock, + mock_export_pages: MagicMock, + ) -> None: + """Test exporting empty folder logs warning.""" + from confluence_markdown_exporter.confluence import Space + + mock_space = Space(key="TEST", name="Test", description="", homepage=0) + + mock_get_children.return_value = [] + + folder = Folder(id="123", title="Test Folder", space=mock_space) + + with patch("confluence_markdown_exporter.confluence.logger") as mock_logger: + folder.export() + mock_logger.warning.assert_called_once() + assert "No pages found" in mock_logger.warning.call_args[0][0] + + mock_export_pages.assert_called_once_with([]) + diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index 8e81e0f..9aef47d 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -64,6 +64,7 @@ def test_app_has_commands(self) -> None: expected_commands = [ "pages", "pages-with-descendants", + "folders", "spaces", "all-spaces", "config",