#!/pkg/all/bin/ruby
# $Id: namazu.rb,v 1.1.1.1 2009/03/29 20:53:30 k-akashi Exp $
class NamazuField
def initialize(nmzdirname, fieldname)
@filename = nmzdirname + 'NMZ.field.' + fieldname
end
def gets(docid)
result = nil
index = nil
open(@filename + '.i') do |indexfile|
indexfile.seek(docid * 4, 0)
index = indexfile.sysread(4).unpack('N')[0]
end
if index
open(@filename) do |entityfile|
entityfile.seek(index, 0)
result = entityfile.gets()
end
end
result
end
end
class NamazuFieldAll
def initialize(nmzdirname, fieldnames = nil)
@fieldnames = nil
if fieldnames
@fieldnames = fieldnames
else
@fieldnames = Array.new()
Dir::foreach(nmzdirname) do |file|
if /^NMZ\.field\.(.+)\.i$/ =~ file
@fieldnames.push($1)
end
end
end
@nmzfieldall = Hash.new()
for fieldname in @fieldnames
@nmzfieldall[fieldname] = NamazuField.new(nmzdirname, fieldname)
end
end
attr(:fieldnames)
def gets(docid, fieldnames = nil)
result = Hash.new()
unless fieldnames
fieldnames = @fieldnames
end
for fieldname in fieldnames
field = @nmzfieldall[fieldname].gets(docid)
field.chomp!()
result[fieldname] = field
end
result
end
end
class NamazuScores
def initialize(nmzdirname)
@wordfile = nmzdirname + 'NMZ.w'
@indexfile = nmzdirname + 'NMZ.i'
@timefile = nmzdirname + 'NMZ.t'
@docnum = (File::size(nmzdirname + 'NMZ.t') / 4).to_f()
end
def get_wordid(word)
wordid = tmpwordid = index = str = nil
wimin = 0
wimax = File::size(@wordfile + 'i') / 4 - 1
while (wimax >= wimin)
tmpwordid = (wimin + wimax) / 2
open(@wordfile + 'i') do |wordindexfile|
wordindexfile.seek(tmpwordid * 4, 0)
index = wordindexfile.sysread(4).unpack('N')[0]
end
open(@wordfile) do |wordentityfile|
wordentityfile.seek(index, 0)
str = wordentityfile.gets()
end
str.chomp!()
case str <=> word
when 0
wordid = tmpwordid
break
when 1
wimax = tmpwordid - 1
when -1
wimin = tmpwordid + 1
else
end
end
wordid
end
def unpack_w(wstring)
results = Array.new()
result = 0
wstring.each_byte() do |tmpbyte|
if tmpbyte < 128
result += tmpbyte
results.push(result)
result = 0
else
result += (tmpbyte - 128)
result *= 128
end
end
results
end
def get_scores_by_wordid(wordid)
return [Hash.new(), Hash.new()] unless wordid
index = nil
open(@indexfile + 'i') do |indexindexfile|
indexindexfile.seek(wordid * 4, 0)
index = indexindexfile.sysread(4).unpack('N')[0]
end
results = Hash.new()
times = Hash.new()
if index
wstring = ''
tmpresults = nil
open(@indexfile) do |indexentityfile|
indexentityfile.seek(index, 0)
while true
wstring += indexentityfile.sysread(1)
break if wstring[-1] < 128
end
if RUBY_VERSION >= '1.5.4'
tmpresults = indexentityfile.sysread(wstring.unpack('w')[0]).unpack('w*')
else
tmpresults = unpack_w(indexentityfile.sysread(unpack_w(wstring)[0]))
end
end
if tmpresults
docid = 0
time = 0
0.step(tmpresults.length() - 2, 2) do |i|
docid += tmpresults[i]
open(@timefile) do |timefile|
timefile.seek(docid * 4, 0)
time = timefile.sysread(4).unpack('N')[0]
end
next if time < 0
results[docid] = tmpresults[i + 1]
times[docid] = time
end
end
end
[results, times]
end
def get_scores(keywords)
unless keywords.length() >= 2
result, times = get_scores_by_wordid(get_wordid(keywords[0]))
[result, times, [result.keys().length()]]
else
result = Hash.new()
references = Array.new()
scores, times = get_scores_by_wordid(get_wordid(keywords[0]))
docids = scores.keys()
reference = docids.length()
references.push(reference)
idf = Math::log(@docnum / reference) / Math::log(2)
for docid in docids
result[docid] = (scores[docid] * idf) + 1
end
for i in 1 .. keywords.length() - 1
scores, newtimes = get_scores_by_wordid(get_wordid(keywords[i]))
tmpdocids = scores.keys()
reference = tmpdocids.length()
references.push(reference)
idf = Math::log(@docnum / reference) / Math::log(2)
newdocids = docids & tmpdocids
olddocids = docids - newdocids
for docid in newdocids
result[docid] += (scores[docid] * idf) + 1
end
for docid in olddocids
result.delete(docid)
end
times.update(newtimes)
docids = newdocids
end
[result, times, references]
end
end
end
class NamazuConfig
def initialize(nmzconfname = nil)
@config = Hash.new()
unless nmzconfname
tmpnmzconfname = Array.new()
tmpnmzconfname.push(ENV['HOME'] + '/.namazurc') if ENV['HOME']
tmpnmzconfname.push(ENV['NAMAZURC']) if ENV['NAMAZURC']
tmpnmzconfname.push('/usr/local/etc/namazurc/namazurc')
for nmzconfname in tmpnmzconfname
break if FileTest::file?(nmzconfname)
end
end
if FileTest::readable?(nmzconfname)
open(nmzconfname) do |conffile|
conffile.each_line() do |line|
cindex = line.index(/\s*\#/)
if cindex
line[cindex .. line.length() - 1] = ''
end
if /REPLACE\s+(\S+)\s+(\S+)/i =~ line
@config['replace'] = Array.new() unless @config['replace']
@config['replace'].push([$1, $2])
elsif /(\w+)\s+(.+)/ =~ line
@config[$1.downcase()] = $2
end
end
end
end
end
def show_config()
@config.each() do |name, value|
if value.kind_of?(Array)
value.each() do |val|
if val.kind_of?(Array)
puts(name.capitalize() + "\t: " + val.join("\t"))
else
puts(name.capitalize() + "\t: " + val)
end
end
else
puts(name.capitalize() + "\t: " + value)
end
end
end
attr(:config)
end
class Namazu
def initialize(nmzdirname = nil, options = Hash.new())
@scores = @times = @keywords = @references = @config = nil
@docids = @hits = nil
@options = options
nmzconfig = NamazuConfig.new(@options['config'])
if @options['show-config']
nmzconfig.show_config()
exit()
end
@config = nmzconfig.config()
nmzdirname = @config['index'] unless nmzdirname
nmzdirname += File::Separator unless nmzdirname[-1] == File::Separator[0]
@nmzscores = NamazuScores.new(nmzdirname)
@nmzfieldall = NamazuFieldAll.new(nmzdirname)
if @config['template']
@templatedirname = @config['template']
@templatedirname += File::Separator unless @templatedirname[-1] == File::Separator[0]
else
@templatedirname = nmzdirname
end
end
attr(:scores)
attr(:times)
attr(:keywords)
attr(:references)
attr(:config)
attr(:docids)
attr(:hits)
def search(keywords)
@keywords = keywords
@scores, @times, @references = @nmzscores.get_scores(keywords)
@docids = @scores.keys()
@hits = @docids.length()
end
def docids_sort!()
@docids.sort!() { |a, b| @scores[b] <=> @scores[a] }
@docids.sort!() { |a, b| @times[b] <=> @times[a] } if @options['late']
@docids.sort!() { |a, b| @times[a] <=> @times[b] } if @options['early']
if @options['sort']
if /date/i =~ @options['sort']
@docids.sort!() { |a, b| @times[b] <=> @times[a] }
@docids.reverse!() if /early/i =~ @options['sort']
elsif /field:([^:]*)(:.*)?/i =~ @options['sort']
field = $1
fieldall = Hash.new
for docid in @docids
fieldall[docid] = @nmzfieldall.gets(docid)
end
if field == 'size'
@docids.sort!() { |a, b| fieldall[b][field].to_i() <=> fieldall[a][field].to_i() }
else
@docids.sort!() { |a, b| fieldall[b][field] <=> fieldall[a][field] }
end
end
@docids.reverse!() if /ascending/i =~ @options['sort']
end
@docids.reverse!() if @options['ascending']
end
def read_template(nmztemplatename)
nmztemplate = nil
open(@templatedirname + nmztemplatename) do |file|
nmztemplate = file.read()
end
nmztemplate.gsub!(/
/ip, '') unless @options['form']
if @keywords
nmztemplate.gsub!(/<\/title>/i, ': <' + @keywords.join(' ') + '>\&')
end
nmztemplate.gsub!(/\{cgi\}/i, File::basename($0))
nmztemplate.gsub!(/\$\{(namazu::)?([^\}]+)\}/, '#{\2}')
if @keywords
nmztemplate.gsub!(/(]+name="query"[^>]*)>/ip,
'\1 value="'+ @keywords.join(' ') + '">')
end
@options.each() do |key, value|
nmztemplate.gsub!(/