File: rexml/encodings/ISO-8859-15.rb

Overview
Module Structure
Code

Overview

Module Structure

  module: <Toplevel Module>
  module: REXML#4
  module: Encoding#5
has properties
alias: encode to_iso_8859_15 #7
alias: decode from_iso_8859_15 #8
method: to_iso_8859_15 / 1 #12
method: from_iso_8859_15 / 1 #51

Code

   1  #
   2  # This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
   3  #
   4  module REXML
   5    module Encoding
   6      register("ISO-8859-15") do |o|
   7        alias encode to_iso_8859_15
   8        alias decode from_iso_8859_15
   9      end
  10 
  11      # Convert from UTF-8
  12      def to_iso_8859_15(content)
  13        array_utf8 = content.unpack('U*')
  14        array_enc = []
  15        array_utf8.each do |num|
  16          case num
  17            # shortcut first bunch basic characters
  18          when 0..0xA3; array_enc << num
  19            # characters removed compared to iso-8859-1
  20          when 0xA4; array_enc << '&#164;'
  21          when 0xA6; array_enc << '&#166;'
  22          when 0xA8; array_enc << '&#168;'
  23          when 0xB4; array_enc << '&#180;'
  24          when 0xB8; array_enc << '&#184;'
  25          when 0xBC; array_enc << '&#188;'
  26          when 0xBD; array_enc << '&#189;'
  27          when 0xBE; array_enc << '&#190;'
  28            # characters added compared to iso-8859-1
  29          when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac
  30          when 0x0160; array_enc << 0xA6 # 0xc5 0xa0
  31          when 0x0161; array_enc << 0xA8 # 0xc5 0xa1
  32          when 0x017D; array_enc << 0xB4 # 0xc5 0xbd
  33          when 0x017E; array_enc << 0xB8 # 0xc5 0xbe
  34          when 0x0152; array_enc << 0xBC # 0xc5 0x92
  35          when 0x0153; array_enc << 0xBD # 0xc5 0x93
  36          when 0x0178; array_enc << 0xBE # 0xc5 0xb8
  37          else
  38            # all remaining basic characters can be used directly
  39            if num <= 0xFF
  40              array_enc << num
  41            else
  42              # Numeric entity (&#nnnn;); shard by  Stefan Scholl
  43              array_enc.concat "&\##{num};".unpack('C*')
  44            end
  45          end
  46        end
  47        array_enc.pack('C*')
  48      end
  49      
  50      # Convert to UTF-8
  51      def from_iso_8859_15(str)
  52        array_latin9 = str.unpack('C*')
  53        array_enc = []
  54        array_latin9.each do |num|
  55          case num
  56            # characters that differ compared to iso-8859-1
  57          when 0xA4; array_enc << 0x20AC
  58          when 0xA6; array_enc << 0x0160
  59          when 0xA8; array_enc << 0x0161
  60          when 0xB4; array_enc << 0x017D
  61          when 0xB8; array_enc << 0x017E
  62          when 0xBC; array_enc << 0x0152
  63          when 0xBD; array_enc << 0x0153
  64          when 0xBE; array_enc << 0x0178
  65          else
  66            array_enc << num
  67          end
  68        end
  69        array_enc.pack('U*')
  70      end
  71    end
  72  end