| Module | Algorithms::Rank::RSVD |
| In: |
lib/algorithms/rank/rsvd.rb
|
| FILE_PREFIX | = | File.dirname(__FILE__) + '/rsvd/bin/rsvd_save' |
| Inf | = | 1.0/0 |
# File lib/algorithms/rank/rsvd.rb, line 61
61: def load_solution(question_id)
62: name = name_for_question(question_id)
63: Marshal.load(File.open(name, 'r') { |file| file.read }) if File.exists?(name)
64: end
# File lib/algorithms/rank/rsvd.rb, line 10
10: def row_list(question_id, row = 1)
11: sol = load_solution(question_id)
12: if sol && sol.shift
13: h, v, p, items = sol
14: p.row(row).to_a.flatten.zip(items).sort_by { |el| -el[0] }
15: end
16: end
# File lib/algorithms/rank/rsvd.rb, line 18
18: def svd(question_id, start_k = 0, end_k = 10, h = 0.001)
19: old_sol = stored_solution(question_id)
20: num_prompts = old_sol && old_sol.shift
21: if num_prompts == true
22: old_sol
23: else
24: num_prompts ||= ::Prompt.count(:conditions => { :question_id => question_id })
25: votes, visits, items = make_vote_data(question_id)
26: v_len = visits.length
27: p_len = items.length
28: if old_sol
29: h, v_old, p_old = old_sol
30: else
31: tmp, h, v_old, p_old = load_solution(question_id) || [
32: nil, h,
33: DMatrix.new(v_len, start_k + 1) { rand },
34: DMatrix.new(start_k + 1, p_len) { rand }
35: ]
36: end
37: tmp = v_old
38: v_old = DMatrix.new(v_len, start_k + 1)
39: 0.upto(start_k) { |i| v_old.replace_column(i, tmp.col(i)) }
40: tmp = p_old
41: p_old = DMatrix.new(start_k + 1, p_len)
42: 0.upto(start_k) { |i| p_old.replace_row(i, tmp.row(i)) }
43:
44: for i in start_k...end_k do
45: scores = v_old * p_old
46: puts "k: #{i} of #{end_k}, h: #{h}, dims: #{scores.dimensions.inspect}"
47: res = matrix_approximation(scores.to_a.flatten, v_len, p_len, votes.values, h)
48: break unless res
49: v = res.first(v_len)
50: p = res.last(p_len)
51: v = DMatrix.new(v_len, 1) { |i,j| v[i] }
52: p = DMatrix.new(1, p_len) { |i,j| p[j] }
53: v_old = DMatrix.join_columns(v_old.columns.map { |x| x } + v.columns.map { |x| x })
54: p_old = DMatrix.join_rows(p_old.rows.map { |x| x } + p.rows.map { |x| x })
55: save_solution(question_id, num_prompts, h, v_old, p_old, items, visits)
56: end
57: [h, v_old, p_old, items, visits]
58: end
59: end
Ruby version of descent, slower than C version.
Approximate matrices v and p
| scores_old<DMatrix>: | matrix of dot product of old v,p |
| votes<Hash>: | hash of visits to array of [winner, loser] arrays fo |
pairs visitor voted on.
| h<Float>: | step size |
# File lib/algorithms/rank/rsvd.rb, line 75
75: def _ruby_matrix_approximation(scores_old, votes, h = 0.001) #:doc:
76: v_len = scores_old.vsize
77: p_len = scores_old.hsize
78:
79: v = DMatrix.new(v_len, 1, 0.5)
80: p = DMatrix.new(1, p_len, 0.1)
81:
82: old_num_errs = [Inf] * 10
83: old_err = [Inf] * 10
84: stop = false
85:
86: until stop do
87: scores = v * p
88: dv = DMatrix.new(v.vsize, v.hsize, 0)
89: dp = DMatrix.new(p.vsize, p.hsize, 0)
90:
91: err = 0
92: num_errs = 0
93:
94: for i in 0...votes.length
95: for winner, loser in votes[i]
96: winner_val = scores_old[i, winner] + scores[i, winner]
97: loser_val = scores_old[i, loser] + scores[i, loser]
98:
99: e = winner_val - loser_val
100: num_errs -= 1 if e > 0
101: e = 1 - e
102: err += e * e
103: num_errs += 1
104:
105: dv[i] += h*2*e*(p[0,winner] - p[0,loser])
106: adj = h*2*e*v[i,0]
107: dp[0,loser] -= adj
108: dp[0,winner] += adj
109: end
110: end
111:
112: puts "[#{num_errs}, #{err}]"
113:
114: v += dv
115: p += dp
116:
117: old_num_errs << num_errs
118: old_err << err
119:
120: stop = err_window(old_num_errs) && err_window(old_err)
121:
122: old_num_errs.shift
123: old_err.shift
124: end
125: [v, p]
126: end
Make vote data from question id. Note that this vote data format will work with the C version of matrix_approximation but NOT with the Ruby version.
votes hash, visits array, and items array as an array.
| question_id<int>: | ID of question to construct vote data for. |
# File lib/algorithms/rank/rsvd.rb, line 139
139: def make_vote_data(question_id) #:doc:
140: result = ActiveRecord::Base.connection.execute(
141: "SELECT items_votes.item_id AS winner_id, items_prompts.item_id AS
142: loser_id, votes.tracking FROM votes INNER JOIN prompts ON (prompts.id=votes.prompt_id
143: AND prompts.question_id=#{question_id}) INNER JOIN items_votes ON
144: (items_votes.vote_id=votes.id) INNER JOIN items_prompts ON
145: items_prompts.prompt_id=prompts.id AND items_prompts.item_id!=
146: items_votes.item_id"
147: )
148: votes = {}
149: visits = []
150: items = []
151: # use votes.tracking to distinguish visitors
152: while rec = result.fetch_hash
153: winner_id, loser_id = rec['winner_id'], rec['loser_id']
154: for id in [winner_id, loser_id]
155: items << id unless items.include?(id)
156: end
157: winner = items.index(winner_id)
158: loser = items.index(loser_id)
159:
160: tracking = rec['tracking']
161: unless visits.include?(tracking)
162: votes[visits.length] = []
163: visits << tracking
164: end
165: visit = visits.index(tracking)
166: # votes[visit] << [winner, loser]
167: votes[visit] << winner << loser
168: end
169: result.free
170: [votes, visits, items]
171: end
Use stored solution unless no stored solution or solution is more than 1 day old and number of new prompts greater than 105% of old number of prompts.
If described conditions met returns stored solution, otherwise false or nil.
| question_id<int>: | question ID. |
# File lib/algorithms/rank/rsvd.rb, line 191
191: def stored_solution(question_id) #:doc:
192: name = name_for_question(question_id)
193: if File.exists?(name) && Time.now - File.mtime(name) < 1.day
194: num_prompts = ::Prompt.count(:conditions => { :question_id => question_id })
195: sol = load_solution(question_id)
196: sol[0] = num_prompts.to_f > sol[0] * 1.05 ? num_prompts : true
197: sol
198: end
199: end