-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy patherdoc-indexer.rb
executable file
·93 lines (77 loc) · 2.54 KB
/
erdoc-indexer.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require 'pp'
require 'rubygems'
require 'forkoff'
require 'hpricot'
require 'active_support/core_ext'
require './common'
C_MODULES = %w(erl_nif erl_driver ic_clib ic_c_protocol
erl_set_memory_block erl_eterm ei ei_connect erl_marshal)
def check_module_documentation(path)
doc = Hpricot(IO.read(path))
name_candidates = (doc/"body div.innertube > center > h1")
if name_candidates.size != 1 || name_candidates[0].children.size != 1 # || !(String === name_candidates[0].children[0])
return
end
module_name = name_candidates[0].children[0].to_s
return if C_MODULES.include?(module_name)
funs_hash = {}
fun_names = (doc/"body div.innertube > p > a > span.bold_code").to_a + (doc/"body div.innertube > p > span.bold_code").to_a
module_prefix = module_name + ':'
fun_names.each do |element|
fragment_name = element.parent.attributes['name']
if fragment_name.blank?
anc = (element.parent)/"a[@name]"
next if anc.blank?
fragment_name = anc[0].attributes['name']
next if fragment_name.blank?
end
text = element.inner_text.gsub(/[\u00a0 ]+/," ") # replace multiple non-breakable or plain spaces with single space
next if text.empty?
unless text =~ /\A([^\(]+)\(/ || text =~ /\A([^\(]+) </
# STDERR.puts "ooops at #{text}.\nelement is #{element.pretty_inspect}" #\n\ndoc: #{doc.pretty_inspect}"
next
end
text = text[module_prefix.length..-1] if text.starts_with?(module_prefix)
text.gsub!(/\s*[\n\r]\s*/,' ')
funs_hash[text] = fragment_name.to_s unless funs_hash[text]
end
{
:module => module_name,
:hash => funs_hash,
:path => path
}
end
def process_file(path)
path = File.expand_path(path)
STDERR.print "processing #{path}.."
rv = check_module_documentation(path)
STDERR.puts(rv ? "ok" : "garbage")
rv
end
# pp process_file("/usr/share/doc/erlang/lib/stdlib-1.18.2/doc/html/array.html")
# pp process_file("/usr/share/doc/erlang-doc/lib/stdlib-1.17.4/doc/html/lists.html")
# exit
data = ARGV.map do |path|
if File.directory?(path)
Dir.chdir(path) do
Dir['**/*.html'].forkoff(:processes => 4) do |fpath|
process_file(fpath)
end
end
else
process_file(path)
end
end.flatten.compact
data.each do |modinfo|
mod_name = modinfo[:module]
path = modinfo[:path]
modinfo[:hash].each do |name, hash|
key = "#{mod_name}/#{name}"
value = "file://#{path}##{hash}"
print_cdb_entry key, value
end
end
print_cdb_entry "--extra-args", "--dir-separator='/'"
puts