python 字符编码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29



# Author:Finyaci
import chardet
# Python3 中默认Unicode ,Unicode包含utf-8
import sys

print("sysdefaultencoding: ",sys.getdefaultencoding())
print("------------------------")


str1 = "埃塞克斯大风"


utf8_ec = str1.encode("utf-8") # unicode-->utf-8
gbk_ec = str1.encode("gbk") # unicode-->gbk
gbk_to_utf8 = gbk_ec.decode("gbk").encode("utf-8") # gbk-->unicode-->utf-8
utf8_to_gbk = utf8_ec.decode("utf-8").encode("gbk")

print("utf8_ec: ",utf8_ec,chardet.detect(utf8_ec))
print("gbk_ec: ",gbk_ec,chardet.detect(gbk_ec))
print("------------------------")
print("gbk_to_utf8",gbk_to_utf8,chardet.detect(gbk_to_utf8))
print("utf8_to_gbk",utf8_to_gbk,chardet.detect(utf8_to_gbk))

#CON: xxx-->unicode decode()
# unicode-->xxx encode()
# 所有不同字符集的转换都要经过Unicode