class Buryspam::Cache

Create cache files which make re-initialization faster. There are two types of cache files associated with each processed mbox:

The cache directory structure will mirror the good/bad mail directories specified for initialization in the configuration.

Constants

COUNTS_EXT

Extension name used for the counts cache.

META_EXT

Extension name used for the meta cache.

Attributes

counts[R]

Public Class Methods

new(gb, mbox_filename, init_date_range) click to toggle source

For the given good/bad mbox file, determine the two cache file names and load them.

# File buryspam.rb, line 4672
def initialize(gb, mbox_filename, init_date_range)
  @status = Status.new("loading cache...")

  @mbox_mtime = File.mtime(mbox_filename)
  @init_date_range = init_date_range

  dir, file = File.split(mbox_filename)
  unless dir.gsub!(/#{ENV['HOME']}/, "")
    raise mbox_filename + " not in HOME directory?"
  end

  dir = File.join(Config.cache_dir, dir)
  FileUtils.ensure_dir_exists(dir)
  base = File.join(dir, '_' + file)

  @meta_file = base + META_EXT
  @counts_file = base + COUNTS_EXT

  begin
    load_caches(gb, mbox_filename)
  rescue CorruptError
    raise "\nMissing or corrupt cache files.\n" +
                   "Try reinitializing with '-i'\n"
  end
  @status.finish
end

Public Instance Methods

[](key) click to toggle source

Return the specified metadata element from the metadata cache.

# File buryspam.rb, line 4701
def [](key)
  return @meta[key] if @meta.has_key?(key)
  raise "metadata cache has no key '#{key}'!"
end

Private Instance Methods

load_caches(gb, mbox_filename) click to toggle source

Load the cache files containing the various counts into memory. If a cache file doesn't exist or is outdated, then update it by (re)processing the corresponding mbox file. The logic for this method is convoluted because it tries to avoid reading/processing mbox files, if possible.

# File buryspam.rb, line 4714
def load_caches(gb, mbox_filename)
  @meta = Hashbase.load(@meta_file)

  # (Re)read the mbox and (re)generate both mbox
  # metadata and counts caches if metadata is outdated.
  if outdated?
    Logger.debug("Regenerating outdated cache.")
    @status.update("reading mbox...")
    mbox = Mbox.read_file(mbox_filename)
    @meta = mbox.metadata(gb, @init_date_range, @mbox_mtime)
    @status.update("processing: ")
    @counts = mbox.counts(@meta[:count_type])

    changed = true
  else
    # metadata is already upto date...

    # Make sure the count cache hasn't changed due to
    # date_range drift...

    # What type *should* the count cache be.
    meta_vals = @meta.values_at(:date_range, :num_msgs)
    count_type = Mbox.count_type(@init_date_range, *meta_vals)
    Logger.debug {
      "current count cache type: #{@meta[:count_type].inspect}"
    }
    Logger.debug {
      "wanted count cache type: #{count_type.inspect}"
    }

    # If the count types are the same, then no need to
    # regenerate/save the metadata/counts cache.  Use the
    # count cache previously generated
    if count_type == @meta[:count_type]
      Logger.debug("Using existing counts cache.")
      @counts = Hashbase.load(@counts_file)
      raise CorruptError if @counts.nil? && !count_type.nil?
    # count_type did change, (re)generate the count cache,
    # if necessary.
    else
      # Convert a :times count to a :total count.
      # This makes re-reading the mbox unnecessary.
      if count_type == :total && @meta[:count_type] == :times
        Logger.debug("Converting time count to total count.")
        @status.update("converting count...")
        @counts = Mbox.total_count(Hashbase.load(@counts_file))
        raise CorruptError if @counts.nil?
      # Nothing to count!
      elsif count_type.nil?
        @counts = nil
      # No choice but to (re)generate counts.
      else
        Logger.debug("Regenerating cache.")
        @status.update("reading mbox...")
        mbox = Mbox.read_file(mbox_filename)
        @status.update("processing: ")
        @counts = mbox.counts(count_type)
      end
      @meta[:count_type] = count_type
      changed = true
    end
  end

  if changed
    @status.update("saving cache...")
    Hashbase.save(@meta_file => @meta, @counts_file => @counts)
  end
end
outdated?() click to toggle source

Returns true if the cache metadata file is outdated; false otherwise.

# File buryspam.rb, line 4784
def outdated?
  return true if @meta.nil? || @meta.empty? || @meta[:mtime] != @mbox_mtime
  Config::META_CACHE.each { |key|
    return true if @meta[key.to_sym] != Config.send(key)
  }
  false
end