1 # -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
2 module REXML
3 module Encoding
4 @encoding_methods = {}
5 def self.register(enc, &block)
6 @encoding_methods[enc] = block
7 end
8 def self.apply(obj, enc)
9 @encoding_methods[enc][obj]
10 end
11 def self.encoding_method(enc)
12 @encoding_methods[enc]
13 end
14
15 # Native, default format is UTF-8, so it is declared here rather than in
16 # an encodings/ definition.
17 UTF_8 = 'UTF-8'
18 UTF_16 = 'UTF-16'
19 UNILE = 'UNILE'
20
21 # ID ---> Encoding name
22 attr_reader :encoding
23 def encoding=( enc )
24 old_verbosity = $VERBOSE
25 begin
26 $VERBOSE = false
27 enc = enc.nil? ? nil : enc.upcase
28 return false if defined? @encoding and enc == @encoding
29 if enc and enc != UTF_8
30 @encoding = enc
31 raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
32 @encoding.untaint
33 begin
34 require 'rexml/encodings/ICONV.rb'
35 Encoding.apply(self, "ICONV")
36 rescue LoadError, Exception
37 begin
38 enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
39 require enc_file
40 Encoding.apply(self, @encoding)
41 rescue LoadError => err
42 puts err.message
43 raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
44 end
45 end
46 else
47 @encoding = UTF_8
48 require 'rexml/encodings/UTF-8.rb'
49 Encoding.apply(self, @encoding)
50 end
51 ensure
52 $VERBOSE = old_verbosity
53 end
54 true
55 end
56
57 def check_encoding str
58 # We have to recognize UTF-16, LSB UTF-16, and UTF-8
59 if str[0] == 0xfe && str[1] == 0xff
60 str[0,2] = ""
61 return UTF_16
62 elsif str[0] == 0xff && str[1] == 0xfe
63 str[0,2] = ""
64 return UNILE
65 end
66 str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
67 return $3.upcase if $3
68 return UTF_8
69 end
70 end
71 end