1 #
2 # This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
3 #
4 module REXML
5 module Encoding
6 register("ISO-8859-15") do |o|
7 alias encode to_iso_8859_15
8 alias decode from_iso_8859_15
9 end
10
11 # Convert from UTF-8
12 def to_iso_8859_15(content)
13 array_utf8 = content.unpack('U*')
14 array_enc = []
15 array_utf8.each do |num|
16 case num
17 # shortcut first bunch basic characters
18 when 0..0xA3; array_enc << num
19 # characters removed compared to iso-8859-1
20 when 0xA4; array_enc << '¤'
21 when 0xA6; array_enc << '¦'
22 when 0xA8; array_enc << '¨'
23 when 0xB4; array_enc << '´'
24 when 0xB8; array_enc << '¸'
25 when 0xBC; array_enc << '¼'
26 when 0xBD; array_enc << '½'
27 when 0xBE; array_enc << '¾'
28 # characters added compared to iso-8859-1
29 when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac
30 when 0x0160; array_enc << 0xA6 # 0xc5 0xa0
31 when 0x0161; array_enc << 0xA8 # 0xc5 0xa1
32 when 0x017D; array_enc << 0xB4 # 0xc5 0xbd
33 when 0x017E; array_enc << 0xB8 # 0xc5 0xbe
34 when 0x0152; array_enc << 0xBC # 0xc5 0x92
35 when 0x0153; array_enc << 0xBD # 0xc5 0x93
36 when 0x0178; array_enc << 0xBE # 0xc5 0xb8
37 else
38 # all remaining basic characters can be used directly
39 if num <= 0xFF
40 array_enc << num
41 else
42 # Numeric entity (&#nnnn;); shard by Stefan Scholl
43 array_enc.concat "&\##{num};".unpack('C*')
44 end
45 end
46 end
47 array_enc.pack('C*')
48 end
49
50 # Convert to UTF-8
51 def from_iso_8859_15(str)
52 array_latin9 = str.unpack('C*')
53 array_enc = []
54 array_latin9.each do |num|
55 case num
56 # characters that differ compared to iso-8859-1
57 when 0xA4; array_enc << 0x20AC
58 when 0xA6; array_enc << 0x0160
59 when 0xA8; array_enc << 0x0161
60 when 0xB4; array_enc << 0x017D
61 when 0xB8; array_enc << 0x017E
62 when 0xBC; array_enc << 0x0152
63 when 0xBD; array_enc << 0x0153
64 when 0xBE; array_enc << 0x0178
65 else
66 array_enc << num
67 end
68 end
69 array_enc.pack('U*')
70 end
71 end
72 end