unicode转utf

学习了一下大佬关于utf-8编码的知识,自己写了一下Unicode码值转化为UTF-8编码的函数。

感觉自己写的是不带分号的C++,完全没用什么厉害的Python语法。还被python没有++给坑了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
bit_cnt = [0,7,11,16,21]

def (n):
if n > 0x10ffff:
print("Out of range")
return -1

bits = []
while n != 0:
bits.append(n & 1)
n >>= 1
l = len(bits)

byt = 0
for i in range(1, 5):
if l <= bit_cnt[i]:
byt = i
break

while l < bit_cnt[byt]:
bits.append(0)
l += 1
bits.reverse()

cur = 0
res = []
if byt != 1:
for i in range(byt):
res.append(1)
res.append(0)

while len(res) < 8:
res.append(bits[cur])
cur += 1

for i in range(byt - 1):
res.append(1)
res.append(0)
for j in range(6):
res.append(bits[cur])
cur += 1

return res

print(to_utf8(0b10001))
print(to_utf8(0b100000001))
print(to_utf8(0b1000100101111111))
print(to_utf8(0b11111011000000010))