Changeset 1594
- Timestamp:
- 07/24/05 22:46:33 (3 years ago)
- Files:
-
- trunk/lib/xsd/charset.rb (modified) (9 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/lib/xsd/charset.rb
r1552 r1594 1 1 # XSD4R - Charset handling library. 2 # Copyright (C) 2001, 2003 NAKAMURA, Hiroshi <nahi@ruby-lang.org>.2 # Copyright (C) 2001, 2003, 2005 NAKAMURA, Hiroshi <nahi@ruby-lang.org>. 3 3 4 4 # This program is copyrighted free software by NAKAMURA, Hiroshi. You can … … 11 11 12 12 module Charset 13 @ encoding = $KCODE13 @internal_encoding = $KCODE 14 14 15 15 class XSDError < StandardError; end … … 27 27 begin 28 28 require 'xsd/iconvcharset' 29 @encoding = 'UTF8' 30 sjtag = (/(mswin|bccwin|mingw|cygwin|emx)/ =~ RUBY_PLATFORM) ? 'cp932' : 'shift_jis' 31 EncodingConvertMap[['UTF8', 'EUC' ]] = Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "utf-8", str) } 32 EncodingConvertMap[['EUC' , 'UTF8']] = Proc.new { |str| IconvCharset.safe_iconv("utf-8", "euc-jp", str) } 33 EncodingConvertMap[['EUC' , 'SJIS']] = Proc.new { |str| IconvCharset.safe_iconv(sjtag, "euc-jp", str) } 34 EncodingConvertMap[['UTF8', 'SJIS']] = Proc.new { |str| IconvCharset.safe_iconv(sjtag, "utf-8", str) } 35 EncodingConvertMap[['SJIS', 'UTF8']] = Proc.new { |str| IconvCharset.safe_iconv("utf-8", sjtag, str) } 36 EncodingConvertMap[['SJIS', 'EUC' ]] = Proc.new { |str| IconvCharset.safe_iconv("euc-jp", sjtag, str) } 29 @internal_encoding = 'UTF8' 30 sjtag = (/(mswin|bccwin|mingw|cygwin|emx)/ =~ RUBY_PLATFORM) ? 'cp932' : 31 'shift_jis' 32 EncodingConvertMap[['UTF8', 'EUC' ]] = 33 Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "utf-8", str) } 34 EncodingConvertMap[['EUC' , 'UTF8']] = 35 Proc.new { |str| IconvCharset.safe_iconv("utf-8", "euc-jp", str) } 36 EncodingConvertMap[['EUC' , 'SJIS']] = 37 Proc.new { |str| IconvCharset.safe_iconv(sjtag, "euc-jp", str) } 38 EncodingConvertMap[['UTF8', 'SJIS']] = 39 Proc.new { |str| IconvCharset.safe_iconv(sjtag, "utf-8", str) } 40 EncodingConvertMap[['SJIS', 'UTF8']] = 41 Proc.new { |str| IconvCharset.safe_iconv("utf-8", sjtag, str) } 42 EncodingConvertMap[['SJIS', 'EUC' ]] = 43 Proc.new { |str| IconvCharset.safe_iconv("euc-jp", sjtag, str) } 37 44 rescue LoadError 38 45 begin 39 46 require 'nkf' 40 EncodingConvertMap[['EUC' , 'SJIS']] = Proc.new { |str| NKF.nkf('-sXm0', str) } 41 EncodingConvertMap[['SJIS', 'EUC' ]] = Proc.new { |str| NKF.nkf('-eXm0', str) } 47 EncodingConvertMap[['EUC' , 'SJIS']] = 48 Proc.new { |str| NKF.nkf('-sXm0', str) } 49 EncodingConvertMap[['SJIS', 'EUC' ]] = 50 Proc.new { |str| NKF.nkf('-eXm0', str) } 42 51 rescue LoadError 43 52 end … … 45 54 begin 46 55 require 'uconv' 47 @ encoding = 'UTF8'56 @internal_encoding = 'UTF8' 48 57 EncodingConvertMap[['UTF8', 'EUC' ]] = Uconv.method(:u8toeuc) 49 58 EncodingConvertMap[['UTF8', 'SJIS']] = Uconv.method(:u8tosjis) … … 68 77 # 69 78 def Charset.encoding 70 @ encoding79 @internal_encoding 71 80 end 72 81 73 82 def Charset.encoding=(encoding) 74 83 warn("xsd charset is set to #{encoding}") if $DEBUG 75 @ encoding = encoding84 @internal_encoding = encoding 76 85 end 77 86 78 87 def Charset.encoding_label 79 charset_label(@ encoding)88 charset_label(@internal_encoding) 80 89 end 81 90 82 91 def Charset.encoding_to_xml(str, charset) 83 encoding_conv(str, @ encoding, charset_str(charset))92 encoding_conv(str, @internal_encoding, charset_str(charset)) 84 93 end 85 94 86 95 def Charset.encoding_from_xml(str, charset) 87 encoding_conv(str, charset_str(charset), @ encoding)96 encoding_conv(str, charset_str(charset), @internal_encoding) 88 97 end 89 98 … … 95 104 else 96 105 raise CharsetConversionError.new( 97 "Converter not found: #{ enc_from } -> #{ enc_to}")106 "Converter not found: #{enc_from} -> #{enc_to}") 98 107 end 99 108 end … … 113 122 # us_ascii = '[\x00-\x7F]' 114 123 us_ascii = '[\x9\xa\xd\x20-\x7F]' # XML 1.0 restricted. 115 USASCIIRegexp = Regexp.new("\\A#{ us_ascii}*\\z", nil, "NONE")124 USASCIIRegexp = Regexp.new("\\A#{us_ascii}*\\z", nil, "NONE") 116 125 117 126 twobytes_euc = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])' 118 127 threebytes_euc = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])' 119 character_euc = "(?:#{ us_ascii }|#{ twobytes_euc }|#{ threebytes_euc})"120 EUCRegexp = Regexp.new("\\A#{ character_euc}*\\z", nil, "NONE")128 character_euc = "(?:#{us_ascii}|#{twobytes_euc}|#{threebytes_euc})" 129 EUCRegexp = Regexp.new("\\A#{character_euc}*\\z", nil, "NONE") 121 130 122 131 # onebyte_sjis = '[\x00-\x7F\xA1-\xDF]' 123 132 onebyte_sjis = '[\x9\xa\xd\x20-\x7F\xA1-\xDF]' # XML 1.0 restricted. 124 133 twobytes_sjis = '(?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])' 125 character_sjis = "(?:#{ onebyte_sjis }|#{ twobytes_sjis})"126 SJISRegexp = Regexp.new("\\A#{ character_sjis}*\\z", nil, "NONE")134 character_sjis = "(?:#{onebyte_sjis}|#{twobytes_sjis})" 135 SJISRegexp = Regexp.new("\\A#{character_sjis}*\\z", nil, "NONE") 127 136 128 137 # 0xxxxxxx … … 133 142 # 11110uuu 10uuuzzz 10yyyyyy 10xxxxxx 134 143 fourbytes_utf8 = '(?:[\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])' 135 character_utf8 = "(?:#{ us_ascii }|#{ twobytes_utf8 }|#{ threebytes_utf8 }|#{ fourbytes_utf8 })" 136 UTF8Regexp = Regexp.new("\\A#{ character_utf8 }*\\z", nil, "NONE") 144 character_utf8 = 145 "(?:#{us_ascii}|#{twobytes_utf8}|#{threebytes_utf8}|#{fourbytes_utf8})" 146 UTF8Regexp = Regexp.new("\\A#{character_utf8}*\\z", nil, "NONE") 137 147 138 148 def Charset.is_us_ascii(str) … … 163 173 is_sjis(str) 164 174 else 165 raise UnknownCharsetError.new("Unknown charset: #{ code}")175 raise UnknownCharsetError.new("Unknown charset: #{code}") 166 176 end 167 177 end