Module Algorithms::Rank::RSVD
In: lib/algorithms/rank/rsvd.rb

Methods

Included Modules

Linalg RsvdFast

Constants

FILE_PREFIX = File.dirname(__FILE__) + '/rsvd/bin/rsvd_save'
Inf = 1.0/0

Public Instance methods

[Source]

    # File lib/algorithms/rank/rsvd.rb, line 61
61:   def load_solution(question_id)
62:     name = name_for_question(question_id)
63:     Marshal.load(File.open(name, 'r') { |file| file.read }) if File.exists?(name)
64:   end

[Source]

    # File lib/algorithms/rank/rsvd.rb, line 10
10:   def row_list(question_id, row = 1)
11:     sol = load_solution(question_id)
12:     if sol && sol.shift
13:       h, v, p, items = sol
14:       p.row(row).to_a.flatten.zip(items).sort_by { |el| -el[0] }
15:     end
16:   end

[Source]

    # File lib/algorithms/rank/rsvd.rb, line 18
18:   def svd(question_id, start_k = 0, end_k = 10, h = 0.001)
19:     old_sol = stored_solution(question_id)
20:     num_prompts = old_sol && old_sol.shift
21:     if num_prompts == true
22:       old_sol
23:     else
24:       num_prompts ||= ::Prompt.count(:conditions => { :question_id => question_id })
25:       votes, visits, items = make_vote_data(question_id)
26:       v_len = visits.length
27:       p_len = items.length
28:       if old_sol
29:         h, v_old, p_old = old_sol
30:       else
31:         tmp, h, v_old, p_old = load_solution(question_id) || [
32:           nil, h,
33:           DMatrix.new(v_len, start_k + 1) { rand },
34:           DMatrix.new(start_k + 1, p_len) { rand }
35:         ]
36:       end
37:       tmp = v_old
38:       v_old = DMatrix.new(v_len, start_k + 1)
39:       0.upto(start_k) { |i| v_old.replace_column(i, tmp.col(i)) }
40:       tmp = p_old
41:       p_old = DMatrix.new(start_k + 1, p_len)
42:       0.upto(start_k) { |i| p_old.replace_row(i, tmp.row(i)) }
43: 
44:       for i in start_k...end_k do
45:         scores = v_old * p_old
46:         puts "k: #{i} of #{end_k}, h: #{h}, dims: #{scores.dimensions.inspect}"
47:         res = matrix_approximation(scores.to_a.flatten, v_len, p_len, votes.values, h)
48:         break unless res
49:         v = res.first(v_len)
50:         p = res.last(p_len)
51:         v = DMatrix.new(v_len, 1) { |i,j| v[i] }
52:         p = DMatrix.new(1, p_len) { |i,j| p[j] }
53:         v_old = DMatrix.join_columns(v_old.columns.map { |x| x } + v.columns.map { |x| x })
54:         p_old = DMatrix.join_rows(p_old.rows.map { |x| x } + p.rows.map { |x| x })
55:         save_solution(question_id, num_prompts, h, v_old, p_old, items, visits)
56:       end
57:       [h, v_old, p_old, items, visits]
58:     end
59:   end

Private Instance methods

Ruby version of descent, slower than C version.

Return

Approximate matrices v and p

Parameters

scores_old<DMatrix>:matrix of dot product of old v,p
votes<Hash>:hash of visits to array of [winner, loser] arrays fo

pairs visitor voted on.

h<Float>:step size

[Source]

     # File lib/algorithms/rank/rsvd.rb, line 75
 75:     def _ruby_matrix_approximation(scores_old, votes, h = 0.001) #:doc:
 76:       v_len = scores_old.vsize
 77:       p_len = scores_old.hsize
 78: 
 79:       v = DMatrix.new(v_len, 1, 0.5)
 80:       p = DMatrix.new(1, p_len, 0.1)
 81: 
 82:       old_num_errs = [Inf] * 10
 83:       old_err = [Inf] * 10
 84:       stop = false
 85: 
 86:       until stop do
 87:         scores = v * p
 88:         dv = DMatrix.new(v.vsize, v.hsize, 0)
 89:         dp = DMatrix.new(p.vsize, p.hsize, 0)
 90: 
 91:         err = 0
 92:         num_errs = 0
 93: 
 94:         for i in 0...votes.length
 95:           for winner, loser in votes[i]
 96:             winner_val = scores_old[i, winner] + scores[i, winner]
 97:             loser_val = scores_old[i, loser] + scores[i, loser]
 98: 
 99:             e = winner_val - loser_val
100:             num_errs -= 1 if e > 0
101:             e = 1 - e
102:             err += e * e
103:             num_errs += 1
104: 
105:             dv[i] += h*2*e*(p[0,winner] - p[0,loser])
106:             adj = h*2*e*v[i,0]
107:             dp[0,loser] -= adj
108:             dp[0,winner] += adj
109:           end
110:         end
111: 
112:         puts "[#{num_errs}, #{err}]"
113: 
114:         v += dv
115:         p += dp
116: 
117:         old_num_errs << num_errs
118:         old_err << err
119: 
120:         stop = err_window(old_num_errs) && err_window(old_err)
121: 
122:         old_num_errs.shift
123:         old_err.shift
124:       end
125:       [v, p]
126:     end

Make vote data from question id. Note that this vote data format will work with the C version of matrix_approximation but NOT with the Ruby version.

Return

votes hash, visits array, and items array as an array.

Parameters

question_id<int>:ID of question to construct vote data for.

[Source]

     # File lib/algorithms/rank/rsvd.rb, line 139
139:     def make_vote_data(question_id) #:doc:
140:       result = ActiveRecord::Base.connection.execute(
141:         "SELECT items_votes.item_id AS winner_id, items_prompts.item_id AS
142:           loser_id, votes.tracking FROM votes INNER JOIN prompts ON (prompts.id=votes.prompt_id
143:           AND prompts.question_id=#{question_id}) INNER JOIN items_votes ON
144:           (items_votes.vote_id=votes.id) INNER JOIN items_prompts ON
145:           items_prompts.prompt_id=prompts.id AND items_prompts.item_id!=
146:           items_votes.item_id"
147:       )
148:       votes = {}
149:       visits = []
150:       items = []
151:       # use votes.tracking to distinguish visitors
152:       while rec = result.fetch_hash
153:         winner_id, loser_id = rec['winner_id'], rec['loser_id']
154:         for id in [winner_id, loser_id]
155:           items << id unless items.include?(id)
156:         end
157:         winner = items.index(winner_id)
158:         loser = items.index(loser_id)
159: 
160:         tracking = rec['tracking']
161:         unless visits.include?(tracking)
162:           votes[visits.length] = []
163:           visits << tracking
164:         end
165:         visit = visits.index(tracking)
166:         #          votes[visit] << [winner, loser]
167:         votes[visit] << winner << loser
168:       end
169:       result.free
170:       [votes, visits, items]
171:     end

Use stored solution unless no stored solution or solution is more than 1 day old and number of new prompts greater than 105% of old number of prompts.

Return

If described conditions met returns stored solution, otherwise false or nil.

Parameters

question_id<int>:question ID.

[Source]

     # File lib/algorithms/rank/rsvd.rb, line 191
191:     def stored_solution(question_id) #:doc:
192:       name = name_for_question(question_id)
193:       if File.exists?(name) && Time.now - File.mtime(name) < 1.day
194:         num_prompts = ::Prompt.count(:conditions => { :question_id => question_id })
195:         sol = load_solution(question_id)
196:         sol[0] = num_prompts.to_f > sol[0] * 1.05 ? num_prompts : true
197:         sol
198:       end
199:     end

[Validate]