File: active_support/multibyte/unicode_database.rb

Overview
Module Structure
Class Hierarchy
Code

Overview

Module Structure

  module: <Toplevel Module>
  module: ActiveSupport#3
  module: Multibyte#4
has properties
constant: UCD #69
  class: Codepoint#6
inherits from
  Object ( Builtin-Module )
has properties
attribute: code [RW] #7
attribute: combining_class [RW] #7
attribute: decomp_type [RW] #7
attribute: decomp_mapping [RW] #7
attribute: uppercase_mapping [RW] #7
attribute: lowercase_mapping [RW] #7
  class: UnicodeDatabase#11
inherits from
  Object ( Builtin-Module )
has properties
constant: ATTRIBUTES #12
method: initialize #16
method: load #35
method: === / 1 #45
class method: dirname #58
class method: filename #63

Code

   1  # encoding: utf-8
   2 
   3  module ActiveSupport #:nodoc:
   4    module Multibyte #:nodoc:
   5      # Holds data about a codepoint in the Unicode database
   6      class Codepoint
   7        attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
   8      end
   9 
  10      # Holds static data from the Unicode database
  11      class UnicodeDatabase
  12        ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
  13 
  14        attr_writer(*ATTRIBUTES)
  15 
  16        def initialize
  17          @codepoints = Hash.new(Codepoint.new)
  18          @composition_exclusion = []
  19          @composition_map = {}
  20          @boundary = {}
  21          @cp1252 = {}
  22        end
  23 
  24        # Lazy load the Unicode database so it's only loaded when it's actually used
  25        ATTRIBUTES.each do |attr_name|
  26          class_eval(<<-EOS, __FILE__, __LINE__ + 1)
  27            def #{attr_name}  # def codepoints
  28              load            #   load
  29              @#{attr_name}   #   @codepoints
  30            end               # end
  31          EOS
  32        end
  33 
  34        # Loads the Unicode database and returns all the internal objects of UnicodeDatabase.
  35        def load
  36          begin
  37            @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
  38          rescue Exception => e
  39              raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
  40          end
  41 
  42          # Redefine the === method so we can write shorter rules for grapheme cluster breaks
  43          @boundary.each do |k,_|
  44            @boundary[k].instance_eval do
  45              def ===(other)
  46                detect { |i| i === other } ? true : false
  47              end
  48            end if @boundary[k].kind_of?(Array)
  49          end
  50 
  51          # define attr_reader methods for the instance variables
  52          class << self
  53            attr_reader(*ATTRIBUTES)
  54          end
  55        end
  56 
  57        # Returns the directory in which the data files are stored
  58        def self.dirname
  59          File.dirname(__FILE__) + '/../values/'
  60        end
  61 
  62        # Returns the filename for the data file for this version
  63        def self.filename
  64          File.expand_path File.join(dirname, "unicode_tables.dat")
  65        end
  66      end
  67 
  68      # UniCode Database
  69      UCD = UnicodeDatabase.new
  70    end
  71  end