1 require 'iconv'
2
3 module Redmine
4 module CodesetUtil
5
6 def self.replace_invalid_utf8(str)
7 return str if str.nil?
8 if str.respond_to?(:force_encoding)
9 str.force_encoding('UTF-8')
10 if ! str.valid_encoding?
11 str = str.encode("US-ASCII", :invalid => :replace,
12 :undef => :replace, :replace => '?').encode("UTF-8")
13 end
14 elsif RUBY_PLATFORM == 'java'
15 begin
16 ic = Iconv.new('UTF-8', 'UTF-8')
17 str = ic.iconv(str)
18 rescue
19 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
20 end
21 else
22 ic = Iconv.new('UTF-8', 'UTF-8')
23 txtar = ""
24 begin
25 txtar += ic.iconv(str)
26 rescue Iconv::IllegalSequence
27 txtar += $!.success
28 str = '?' + $!.failed[1,$!.failed.length]
29 retry
30 rescue
31 txtar += $!.success
32 end
33 str = txtar
34 end
35 str
36 end
37
38 def self.to_utf8(str, encoding)
39 return str if str.nil?
40 str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding)
41 if str.empty?
42 str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
43 return str
44 end
45 enc = encoding.blank? ? "UTF-8" : encoding
46 if str.respond_to?(:force_encoding)
47 if enc.upcase != "UTF-8"
48 str.force_encoding(enc)
49 str = str.encode("UTF-8", :invalid => :replace,
50 :undef => :replace, :replace => '?')
51 else
52 str.force_encoding("UTF-8")
53 if ! str.valid_encoding?
54 str = str.encode("US-ASCII", :invalid => :replace,
55 :undef => :replace, :replace => '?').encode("UTF-8")
56 end
57 end
58 elsif RUBY_PLATFORM == 'java'
59 begin
60 ic = Iconv.new('UTF-8', enc)
61 str = ic.iconv(str)
62 rescue
63 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
64 end
65 else
66 ic = Iconv.new('UTF-8', enc)
67 txtar = ""
68 begin
69 txtar += ic.iconv(str)
70 rescue Iconv::IllegalSequence
71 txtar += $!.success
72 str = '?' + $!.failed[1,$!.failed.length]
73 retry
74 rescue
75 txtar += $!.success
76 end
77 str = txtar
78 end
79 str
80 end
81
82 def self.to_utf8_by_setting(str)
83 return str if str.nil?
84 str = self.to_utf8_by_setting_internal(str)
85 if str.respond_to?(:force_encoding)
86 str.force_encoding('UTF-8')
87 end
88 str
89 end
90
91 def self.to_utf8_by_setting_internal(str)
92 return str if str.nil?
93 if str.respond_to?(:force_encoding)
94 str.force_encoding('ASCII-8BIT')
95 end
96 return str if str.empty?
97 return str if /\A[\r\n\t\x20-\x7e]*\Z/n.match(str) # for us-ascii
98 if str.respond_to?(:force_encoding)
99 str.force_encoding('UTF-8')
100 end
101 encodings = Setting.repositories_encodings.split(',').collect(&:strip)
102 encodings.each do |encoding|
103 begin
104 return Iconv.conv('UTF-8', encoding, str)
105 rescue Iconv::Failure
106 # do nothing here and try the next encoding
107 end
108 end
109 str = self.replace_invalid_utf8(str)
110 if str.respond_to?(:force_encoding)
111 str.force_encoding('UTF-8')
112 end
113 str
114 end
115
116 def self.from_utf8(str, encoding)
117 str ||= ''
118 if str.respond_to?(:force_encoding)
119 str.force_encoding('UTF-8')
120 if encoding.upcase != 'UTF-8'
121 str = str.encode(encoding, :invalid => :replace,
122 :undef => :replace, :replace => '?')
123 else
124 str = self.replace_invalid_utf8(str)
125 end
126 elsif RUBY_PLATFORM == 'java'
127 begin
128 ic = Iconv.new(encoding, 'UTF-8')
129 str = ic.iconv(str)
130 rescue
131 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
132 end
133 else
134 ic = Iconv.new(encoding, 'UTF-8')
135 txtar = ""
136 begin
137 txtar += ic.iconv(str)
138 rescue Iconv::IllegalSequence
139 txtar += $!.success
140 str = '?' + $!.failed[1, $!.failed.length]
141 retry
142 rescue
143 txtar += $!.success
144 end
145 str = txtar
146 end
147 end
148 end
149 end