Class: Sweeper

Child modules and classes

Class Sweeper::Problem

Constants

NameValue
BASIC_KEYS ['artist', 'title', 'url']
GENRE_KEYS ['genre', 'comment']
ALBUM_KEYS ['album', 'track']
GENRES ID3Lib::Info::Genres
GENRE_COUNT 10
DEFAULT_GENRE {'genre' => 'Other', 'comment' => 'other'}

Attributes

NameRead/write?
options R

Public Class Methods


new (options = {})

Instantiate a new Sweeper. See bin/sweeper for options details.

    # File lib/sweeper.rb, line 38
38:   def initialize(options = {})
39:     @dir = File.expand_path(options['dir'] || Dir.pwd)    
40: 
41:     if RUBY_PLATFORM =~ /win32/
42:       @dir = @dir[2..-1] # Strip drive letter
43:       @null = "nul"
44:     else
45:       @null = "/dev/null"
46:     end
47:           
48:     @options = options
49:     @errf = Tempfile.new("stderr")
50:     @match_cache = {}    
51:   end

Public Instance Methods


binary ()

Returns the path to the fingerprinter binary for this platform.

     # File lib/sweeper.rb, line 249
249:   def binary
250:     here = "#{File.expand_path(File.dirname(__FILE__))}/../vendor"
251:     @binary ||= case RUBY_PLATFORM
252:         when /win32/
253:           if defined?(RUBYSCRIPT2EXE)
254:             e = RUBYSCRIPT2EXE            
255:             p [e.tempdir, e.userdir, e.exedir, e.appdir] if ENV['DEBUG']
256:             "#{e.appdir}/../bin/lastfmfpclient.exe"
257:           else
258:             "#{here}/lastfm.fpclient.beta2.win32/lastfmfpclient.exe"
259:           end
260:         when /darwin/
261:           "#{here}/lastfm.fpclient.beta2.OSX-intel/lastfmfpclient"
262:         else 
263:           "#{here}/lastfm.fpclient.beta2.linux-32/lastfmfpclient"
264:         end
265:   end

load (filename)

Loads metadata for an mp3 file. Looks for which ID3 version is already populated, instead of just the existence of frames.

     # File lib/sweeper.rb, line 268
268:   def load(filename) 
269:     ID3Lib::Tag.new(filename, ID3Lib::V_ALL)
270:   end

lookup (filename, tags = {})

Lookup all available remote metadata for an mp3 file. Accepts a pathname and an optional hash of existing tags. Returns a tag hash.

     # File lib/sweeper.rb, line 133
133:   def lookup(filename, tags = {})
134:     tags = tags.dup
135:     updated = {}
136: 
137:     # Are there any empty basic tags we need to lookup?
138:     if options['force'] or 
139:       (BASIC_KEYS - tags.keys).any?
140:       updated.merge!(lookup_basic(filename))
141:     end
142: 
143:     # Are there any empty genre tags we need to lookup?
144:     if options['genre'] and 
145:       (options['force'] or options['genre'] == 'force' or (GENRE_KEYS - tags.keys).any?)
146:       updated.merge!(lookup_genre(updated.merge(tags)))
147:     end
148: 
149:     if options['force']
150:       # Force all remote tags.
151:       tags.merge!(updated)      
152:     elsif options['genre'] == 'force'
153:       # Force remote genre tags only.
154:       tags.merge!(updated.slice(*GENRE_KEYS))
155:     end
156: 
157:     # Merge back in existing tags.
158:     updated.merge(tags)    
159:   end

lookup_basic (filename)

Lookup the basic metadata for an mp3 file. Accepts a pathname. Returns a tag hash.

     # File lib/sweeper.rb, line 162
162:   def lookup_basic(filename)
163:     Dir.chdir File.dirname(binary) do
164:       cmd = "#{binary} #{filename.inspect} 2> #{@null}"
165:       p cmd if ENV['DEBUG']
166:       response = `#{cmd}`
167:       object = begin
168:         XSD::Mapping.xml2obj(response)
169:       rescue Object => e
170:         raise Problem, "#{e.class.name} - #{e.message}"
171:       end              
172:       raise Problem, "Fingerprint failed" unless object
173:       
174:       tags = {}
175:       song = Array(object.track).first      
176:       
177:       BASIC_KEYS.each do |key|
178:         tags[key] = song.send(key) if song.respond_to? key 
179:       end
180:       tags
181:     end
182:   end

lookup_genre (tags)

Lookup the genre metadata for a set of basic metadata. Accepts a tag hash. Returns a genre tag hash.

     # File lib/sweeper.rb, line 185
185:   def lookup_genre(tags)
186:     return DEFAULT_GENRE if tags['artist'].blank?
187:     
188:     response = begin 
189:       open("http://ws.audioscrobbler.com/1.0/artist/#{URI.encode(tags['artist'])}/toptags.xml").read
190:     rescue Object => e
191:       puts "Open-URI error: #{e.class.name} - #{e.message}" if ENV['DEBUG']
192:       return DEFAULT_GENRE
193:     end
194:     
195:     begin
196:       object = XSD::Mapping.xml2obj(response)
197:     rescue Object => e
198:       puts "XSD error: #{e.class.name} - #{e.message}" if ENV['DEBUG']
199:       return DEFAULT_GENRE
200:     end    
201:      
202:     return DEFAULT_GENRE if !object.respond_to? :tag
203: 
204:     genres = Array(object.tag)[0..(GENRE_COUNT - 1)].map(&:name)
205:     return DEFAULT_GENRE if !genres.any?
206:     
207:     primary = nil
208:     genres.each_with_index do |this, index|
209:       match, weight = nearest_genre(this)
210:       # Bias slightly towards higher tagging counts
211:       weight += ((GENRE_COUNT - index) / GENRE_COUNT / 4.0)
212: 
213:       if ['Rock', 'Pop', 'Rap'].include? match
214:         # Penalize useless genres
215:         weight = weight / 3.0
216:       end
217:             
218:       p [weight, match] if ENV['DEBUG']
219:       
220:       if !primary or primary.first < weight
221:         primary = [weight, match]
222:       end
223:     end
224:     
225:     {'genre' => primary.last, 'comment' => genres.join(", ")}
226:   end

nearest_genre (string)

     # File lib/sweeper.rb, line 272
272:   def nearest_genre(string)
273:     @match_cache[string] ||= begin
274:       results = {}
275:       GENRES.each do |genre|
276:         results[Text::Levenshtein.distance(genre, string)] = genre
277:       end    
278:       min = results.keys.min
279:       match = results[min]
280:       
281:       [match, normalize(match, string, min)]
282:     end    
283:   end

normalize (genre, string, weight)

     # File lib/sweeper.rb, line 285
285:   def normalize(genre, string, weight)
286:     # XXX Algorithm may not be right
287:     if weight == 0
288:       1.0
289:     elsif weight >= genre.size
290:       0.0
291:     elsif genre.size >= string.size
292:       1.0 - (weight / genre.size.to_f)
293:     else
294:       1.0 - (weight / string.size.to_f)
295:     end    
296:   end

read (filename)

Read tags from an mp3 file. Returns a tag hash.

     # File lib/sweeper.rb, line 115
115:   def read(filename)
116:     tags = {}
117:     song = load(filename)
118:     
119:     (BASIC_KEYS + GENRE_KEYS).each do |key|      
120:       tags[key] = song.send(key) if !song.send(key).blank?
121:     end
122:     
123:     # Change numeric genres into TCON strings
124:     # XXX Might not work well
125:     if tags['genre'] =~ /(\d+)/
126:       tags['genre'] = GENRES[$1.to_i]
127:     end
128:     
129:     tags
130:   end

recurse (dir)

Recurse one directory, reading, looking up, and writing each file, if appropriate. Accepts a directory path.

     # File lib/sweeper.rb, line 78
 78:   def recurse(dir)
 79:     # Hackishly avoid problems with metacharacters in the Dir[] string.
 80:     dir = dir.gsub(/[^\s\w\.\/\\\-]/, '?')
 81:     p dir if ENV['DEBUG']
 82:     
 83:     Dir["#{dir}/*"].each do |filename|
 84:       if File.directory? filename and options['recursive']
 85:         recurse(filename)
 86:       elsif File.extname(filename) =~ /\.mp3$/i
 87:         @read += 1
 88:         tries = 0
 89:         begin
 90:           current = read(filename)  
 91:           updated = lookup(filename, current)
 92:           
 93:           if ENV['DEBUG']
 94:             p current, updated
 95:           end
 96: 
 97:           if updated != current 
 98:             # Don't bother updating identical metadata.
 99:             write(filename, updated)
100:             @updated += 1
101:           else
102:             puts "Unchanged: #{File.basename(filename)}"
103:           end
104:           
105:         rescue Problem => e          
106:           tries += 1 and retry if tries < 2
107:           puts "Skipped (#{e.message.gsub("\n", " ")}): #{File.basename(filename)}"
108:           @failed += 1
109:         end
110:       end
111:     end  
112:   end

run ()

Run the Sweeper according to the options.

    # File lib/sweeper.rb, line 54
54:   def run      
55:     @read = 0
56:     @updated = 0
57:     @failed = 0
58: 
59:     Kernel.at_exit do
60:       if @read == 0
61:         puts "No mp3 files found. Maybe you meant --recursive?"
62:       else
63:         puts "Read: #{@read}\nUpdated: #{@updated}\nFailed: #{@failed}"
64:       end
65:     end      
66:   
67:     begin
68:       recurse(@dir)
69:     rescue Object => e
70:       puts "Unknown error: #{e.inspect}"
71:       ENV['DEBUG'] ? raise : exit
72:     end
73:   end

write (filename, tags)

Write tags to an mp3 file. Accepts a pathname and a tag hash.

     # File lib/sweeper.rb, line 229
229:   def write(filename, tags)
230:     return if tags.empty?
231:     puts "Updated: #{File.basename(filename)}"
232:     
233:     song = load(filename)
234:     
235:     tags.each do |key, value|
236:       song.send("#{key}=", value)
237:       puts "  #{key.capitalize}: #{value}"
238:     end
239:     ALBUM_KEYS.each do |key|
240:       puts "  #{key.capitalize}: #{song.send(key)}"
241:     end
242:     
243:     unless options['dry-run']
244:       song.update!(ID3Lib::V2) 
245:     end
246:   end