-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_entity_table.c3
186 lines (153 loc) · 4.88 KB
/
generate_entity_table.c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
module generate_entity_table;
import std::collections::list;
import std::collections::map;
import std::io;
import std::sort;
import xml;
def NameList = List(<String>);
def EntityMap = HashMap(<String, Entity>);
struct Entity
{
String name;
Char32 codepoint;
String description;
}
const INDENT = " ";
const INDENT2X = " ";
const INDENT3X = " ";
const INDENT4X = " ";
const INDENT5X = " ";
fn void! main(String[])
{
XmlDocument* doc = xml::parse_file("tools/unicode.xml")!!;
defer doc.free();
File file = file::open("entity_table.c3", "w")!!;
defer catch (void)file.close();
io::fprint(&file, "module xml;\n\n")!!;
XmlElement* root = doc.get_root_by_name("unicode")!!;
XmlElement* charlist = root.get_element("charlist", doc)!!;
NameList names = *NameList{}.new_init();
defer names.free();
EntityMap entity_map = *EntityMap{}.new_init();
defer entity_map.free();
int min_name_length = int.max;
int max_name_length = int.min;
String shortest_name;
String longest_name;
XmlIterator charlist_it = charlist.iterator_by_element("character", doc);
while (XmlElement* char_element = charlist_it.next())
{
String dec = char_element.get_attribute("dec")!!;
uint! codepoint = dec.to_uint();
if (catch err = codepoint) continue;
Entity e;
e.codepoint = (Char32)codepoint;
if (try XmlElement* desc_el = char_element.get_element("description", doc))
{
if (desc_el.values.len() && desc_el.values[0].type == TEXT)
{
e.description = desc_el.values[0].text;
}
}
XmlIterator entity_it = char_element.iterator_by_search(fn (el) {
if (el.identity != "entity") return false;
String! id = el.attributes.get("id");
if (catch id) return false;
if (id.len == 0) return false;
return true;
}, doc);
while (XmlElement* entity = entity_it.next())
{
e.name = entity.attributes.get("id")!!;
if (try entity_map.get(e.name))
{
continue;
}
if (e.name.len > max_name_length) { longest_name = e.name; }
if (e.name.len < min_name_length) { shortest_name = e.name; }
min_name_length = $$min(min_name_length, (int)e.name.len);
max_name_length = $$max(max_name_length, (int)e.name.len);
entity_map.set(e.name, e);
names.push(e.name);
}
}
sort::quicksort(names, &str_cmp);
io::fprintf(&file, "const XML_NAME_TO_CHAR_32_MIN_LENGTH = %s; // &%s;\n",
min_name_length, shortest_name)!!;
io::fprintf(&file, "const XML_NAME_TO_CHAR_32_MAX_LENGTH = %s; // &%s;\n",
max_name_length, longest_name)!!;
io::fprintf(&file,
`
fn Char32! named_xml_entity_to_char32(String name)
{
if (name.len < $$min(1, XML_NAME_TO_CHAR_32_MIN_LENGTH) ||
name.len > XML_NAME_TO_CHAR_32_MAX_LENGTH)
{
return 0;
}
switch (name[0])
{
`)!!;
char current_prefix = 0;
foreach (n : names)
{
if (current_prefix != n[0])
{
if (current_prefix != 0)
{
io::fprintf(&file, "%s}\n\n", INDENT3X)!!;
}
current_prefix = n[0];
io::fprintf(&file, "%scase '%c':\n", INDENT2X, current_prefix)!!;
io::fprintf(&file, "%sswitch (name)\n%s{\n", INDENT3X, INDENT3X)!!;
}
Entity e = entity_map.get(n)!!;
@pool()
{
String hex = format_hex(e.codepoint);
if (e.description.len > 0)
{
io::fprintf(&file, "%scase \"%s\": return %s; // %s\n",
INDENT4X, e.name, hex, e.description)!!;
}
else
{
io::fprintf(&file, "%scase \"%s\": return %s;\n",
INDENT4X, e.name, hex)!!;
}
};
}
// Close the last nested switch
if (current_prefix != 0)
{
io::fprintf(&file, "%s}\n", INDENT3X)!!;
}
// Close the outer switch
io::fprintf(&file, "%s}\n", INDENT)!!;
io::fprintf(&file,
`
return 0;
}
`)!!;
}
fn String format_hex(Char32 value, Allocator allocator = allocator::temp())
{
// Convert to hex and trim leading zeros while ensuring at least one digit
return string::new_format("0x%X", value, allocator: allocator);
}
fn int str_cmp(String a, String b)
{
usz len_a = a.len;
usz len_b = b.len;
usz min_len = len_a < len_b ? len_a : len_b;
// Compare characters up to the length of the shorter string
for (usz i = 0; i < min_len; i++)
{
if (a[i] != b[i])
{
return (int)(a[i] - b[i]);
}
}
// If all characters match up to min_len, shorter string comes first
return (int)(len_a - len_b);
}