Skip to content

Commit 1ae6e66

Browse files
split into tokenizer
1 parent 5e62e85 commit 1ae6e66

19 files changed

+1401
-1829
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
/dist/
44
*.js
55
*.js.map
6-
*.d.ts
76
*.d.ts.map
87

98
# Keep TypeScript source files
109
!vitest.config.js
1110
!*.config.js
1211
!src/types.d.ts
12+
!./@types/**/*.d.ts
1313

1414
# Node.js
1515
node_modules/
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
declare module '@4dsas/doc_preprocessing/lib/preprocessor.js' {
2+
import { Settings } from '@4dsas/doc_preprocessing/lib/settings.js';
3+
4+
export class Preprocessing {
5+
constructor(settings: Settings);
6+
collect(): Promise<void>;
7+
getSyntaxObject(): any;
8+
}
9+
}
10+
11+
declare module '@4dsas/doc_preprocessing/lib/settings.js' {
12+
export class Settings {
13+
constructor();
14+
get(key: string): any;
15+
set(key: string, value: any): void;
16+
}
17+
18+
export const SETTINGS_KEY: {
19+
readonly PREPROCESSING: string;
20+
readonly PATH: string;
21+
readonly CONFIG: string;
22+
readonly EXCLUDE_LIST: string;
23+
readonly VERBOSE: string;
24+
};
25+
}

README.md

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,50 @@ A comprehensive syntax parser and checker for 4D documentation, written in TypeS
77
## Features
88

99
- Parse 4D syntax strings with multiple variants
10+
- Tokenizer-based architecture for robust parsing
11+
- Multi-level warning system (structural vs type issues)
12+
- CLI support with configurable warning levels
1013
- Validate parameter types and names
1114
- Check for type mismatches and extra parameters
1215
- Support for optional parameters and spread syntax
1316
- Enhanced type validation with forward-slash and comma-separated types
1417
- Full TypeScript support with type definitions
1518

19+
## Architecture
20+
21+
The parser uses a modern 4-step architecture:
22+
23+
1. **Preprocessing**: Remove markdown formatting (`**bold**``bold`)
24+
2. **Tokenization**: Convert text into structured tokens
25+
3. **Malformation Checking**: Detect structural issues (missing braces, parentheses)
26+
4. **Parameter Checking**: Validate parameter types and detect type-related issues
27+
28+
### Warning Levels
29+
30+
- **Level 1**: High priority structural issues (missing parentheses, unmatched braces)
31+
- **Level 2**: Type-related issues (missing types, empty types after colons)
32+
1633
## Project Structure
1734

1835
```
1936
syntax-checker/
20-
├── src/ # TypeScript source files
21-
│ ├── parser.ts # Parser class implementation
22-
│ ├── checker.ts # SyntaxChecker class implementation
23-
│ └── types.d.ts # Type definitions
24-
├── tests/ # Test files
37+
├── src/ # TypeScript source files
38+
│ ├── parser.ts # Main parser orchestrator
39+
│ ├── tokenizer.ts # Tokenization logic
40+
│ ├── malformation-checker.ts # Structural validation
41+
│ ├── parameter-checker.ts # Parameter type validation
42+
│ ├── checker.ts # SyntaxChecker class implementation
43+
│ └── types.d.ts # Type definitions
44+
├── tests/ # Test files
2545
│ ├── parser.test.ts
2646
│ ├── checker.test.ts
2747
│ └── stress/
2848
│ └── stress.test.ts
29-
├── out/ # Compiled JavaScript output (generated)
30-
├── index.ts # Main library entry point
31-
├── check.ts # CLI script
32-
├── tsconfig.json # TypeScript configuration
33-
└── .gitignore # Git ignore file
49+
├── out/ # Compiled JavaScript output (generated)
50+
├── index.ts # Main library entry point
51+
├── check.ts # CLI script
52+
├── tsconfig.json # TypeScript configuration
53+
└── .gitignore # Git ignore file
3454
```
3555

3656
## Development
@@ -58,17 +78,42 @@ npm install @4d-docs/syntax-checker
5878

5979
## Usage
6080

81+
## CLI Usage
82+
83+
```bash
84+
# Check syntax with default settings (Level 1 warnings only)
85+
syntax-checker
86+
87+
# Check with all warnings (Level 1 + Level 2)
88+
syntax-checker --warning-level 2
89+
90+
# Check specific documentation folder
91+
syntax-checker ./docs --warning-level 2
92+
93+
# Short form
94+
syntax-checker -w 2
95+
```
96+
97+
### CLI Options
98+
99+
- `--warning-level, -w <1|2>`: Set warning level (default: 1)
100+
- `1`: Show only high priority warnings (structural issues)
101+
- `2`: Show all warnings (structural + type issues)
102+
- `--help, -h`: Show help message
103+
104+
## API Usage
105+
61106
### Basic Usage
62107

63108
```javascript
64-
import { Parser, SyntaxChecker } from '@4d-docs/syntax-checker';
109+
import { Parser, SyntaxChecker, WarningLevel } from '@4d-docs/syntax-checker';
65110

66111
// Parse syntax string
67112
const parser = new Parser();
68113
const variants = parser.parseSyntax('**VP SET TIME VALUE** ( *rangeObj* : Object ; *timeValue* : Text { ; *formatPattern* : Text } )');
69114

70-
// Check syntax
71-
const checker = new SyntaxChecker();
115+
// Check syntax with specific warning level
116+
const checker = new SyntaxChecker(WarningLevel.LEVEL_2);
72117
await checker.run();
73118
```
74119

src/malformation-checker.ts

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import { Token, TokenType } from './tokenizer.js';
2+
import { WarningLevel, MalformationIssue } from './types.js';
3+
4+
/**
5+
* Malformation checker for tokens
6+
*/
7+
export class MalformationChecker {
8+
private issues: MalformationIssue[] = [];
9+
10+
/**
11+
* Check tokens for structural malformations
12+
* @param tokens - Array of tokens to check
13+
* @returns Array of malformation issues
14+
*/
15+
checkMalformations(tokens: Token[]): MalformationIssue[] {
16+
this.issues = [];
17+
18+
// Skip whitespace tokens upfront for faster processing
19+
const nonWhitespaceTokens = tokens.filter(token => token.type !== TokenType.WHITESPACE);
20+
21+
this.checkBraceBalanceFast(nonWhitespaceTokens);
22+
this.checkEmptyParametersFast(nonWhitespaceTokens);
23+
this.checkUnexpectedTokensFast(nonWhitespaceTokens);
24+
this.checkStructuralIssuesFast(nonWhitespaceTokens);
25+
26+
return this.issues;
27+
}
28+
29+
/**
30+
* Fast brace balance checking
31+
*/
32+
private checkBraceBalanceFast(nonWhitespaceTokens: Token[]): void {
33+
let braceDepth = 0;
34+
35+
for (const token of nonWhitespaceTokens) {
36+
if (token.type === TokenType.OPEN_BRACE) {
37+
braceDepth++;
38+
} else if (token.type === TokenType.CLOSE_BRACE) {
39+
braceDepth--;
40+
if (braceDepth < 0) {
41+
this.addIssue('Extra closing brace (unmatched optional block closure)', WarningLevel.LEVEL_1);
42+
braceDepth = 0; // Reset to prevent cascade errors
43+
}
44+
}
45+
}
46+
47+
if (braceDepth > 0) {
48+
this.addIssue(`Unclosed optional block (missing ${braceDepth} closing brace${braceDepth > 1 ? 's' : ''})`, WarningLevel.LEVEL_1);
49+
}
50+
}
51+
52+
/**
53+
* Fast empty parameter checking
54+
*/
55+
private checkEmptyParametersFast(nonWhitespaceTokens: Token[]): void {
56+
for (let i = 0; i < nonWhitespaceTokens.length; i++) {
57+
const token = nonWhitespaceTokens[i];
58+
const nextToken = nonWhitespaceTokens[i + 1];
59+
const prevToken = nonWhitespaceTokens[i - 1];
60+
61+
if (token.type === TokenType.SEMICOLON) {
62+
// Check for double semicolon
63+
if (nextToken && nextToken.type === TokenType.SEMICOLON) {
64+
this.addIssue('Empty parameter found (double semicolon)', WarningLevel.LEVEL_1);
65+
}
66+
67+
// Check for semicolon at start
68+
if (!prevToken) {
69+
this.addIssue('Empty parameter found (semicolon at start)', WarningLevel.LEVEL_1);
70+
}
71+
}
72+
}
73+
}
74+
75+
/**
76+
* Fast unexpected token checking
77+
*/
78+
private checkUnexpectedTokensFast(nonWhitespaceTokens: Token[]): void {
79+
for (let i = 0; i < nonWhitespaceTokens.length; i++) {
80+
const token = nonWhitespaceTokens[i];
81+
const nextToken = nonWhitespaceTokens[i + 1];
82+
const prevToken = nonWhitespaceTokens[i - 1];
83+
84+
if (token.type === TokenType.COLON) {
85+
// Check for colon without parameter name
86+
if (!prevToken || (prevToken.type !== TokenType.PARAMETER_NAME && prevToken.type !== TokenType.SPREAD)) {
87+
this.addIssue('Unexpected colon (missing parameter name)', WarningLevel.LEVEL_1);
88+
}
89+
90+
// Check for double colon
91+
if (nextToken && nextToken.type === TokenType.COLON) {
92+
this.addIssue('Double colon found in parameter definition', WarningLevel.LEVEL_1);
93+
}
94+
}
95+
}
96+
}
97+
98+
/**
99+
* Fast structural issue checking
100+
*/
101+
private checkStructuralIssuesFast(nonWhitespaceTokens: Token[]): void {
102+
// Check for malformed parameter names (containing asterisks)
103+
for (const token of nonWhitespaceTokens) {
104+
if (token.type === TokenType.PARAMETER_NAME && token.value.includes('*')) {
105+
this.addIssue(`Parameter name '${token.value}' contains asterisks (likely malformed markup)`, WarningLevel.LEVEL_1);
106+
}
107+
108+
// Check for invalid characters in type definitions
109+
if (token.type === TokenType.TYPE && token.value.includes('/')) {
110+
this.addIssue(`Type definition '${token.value}' contains invalid forward slash characters`, WarningLevel.LEVEL_1);
111+
}
112+
}
113+
}
114+
115+
/**
116+
* Add a malformation issue
117+
* @param message - Error message
118+
* @param level - Warning level
119+
*/
120+
private addIssue(message: string, level: WarningLevel): void {
121+
this.issues.push({
122+
message,
123+
level
124+
});
125+
}
126+
}

0 commit comments

Comments
 (0)