visual bag of words with fashion_mnist – studio-31

  • 이미지에서 특징점을 추출한다.
  • 추출한 특징점의 설명자를 클러스터링 한다.
  • 이미지의 특징점들의 클러스터를 하나의 단어라고 생각하고 count vector를 생성한다.
  • 생성한 count vector를 이용하여, 분류 모델을 생성한다.

사용할 데이터 셋 - 손글씨 데이터

1
2
3
4
import cv2
from keras.datasets import fashion_mnist

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

SIFT 특징점 추줄

1
2
3
4
5
6
7
sift = cv2.xfeatures2d.SIFT_create(100)
keypoints = []
descriptors = []
for img in x_test:
kps, des = sift.detectAndCompute(img, None)
keypoints.append(kps)
descriptors.append(des)
1
2
3
4
5
df = pd.DataFrame()
df["kps"] = keypoints
df["des"] = descriptors
df["label"] = y_test
df.tail()
kps des label
9995 [<KeyPoint 0x7fdfce4031e0>, <KeyPoint 0x7fdfce... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 43.0... 9
9996 [<KeyPoint 0x7fdfce4032d0>, <KeyPoint 0x7fdfce... [[2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16.0... 1
9997 [<KeyPoint 0x7fdfce403390>, <KeyPoint 0x7fdfce... [[19.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 92.... 8
9998 [<KeyPoint 0x7fdfce403450>, <KeyPoint 0x7fdfce... [[32.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 191... 1
9999 [<KeyPoint 0x7fdfce403510>, <KeyPoint 0x7fdfce... [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0,... 5

이미지에서 찾은 특징점과 그 설명자(descriptor)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
plt.figure(figsize=(15, 9))
plt.subplot(1, 2, 1)
plt.imshow(x_train[3], cmap='gray')
pts_x = [kp.pt[0] for kp in df.kps[3]]
pts_y = [kp.pt[1] for kp in df.kps[3]]
plt.scatter(pts_x[:4], pts_y[:4], s=200, c='r')
plt.axis('off')

des = df.des[418]
for i in range(4):
plt.subplot(4, 2, (i+1)*2)
plt.bar(np.arange(len(des[i])), des[i])
plt.xticks(range(4, len(des[i]), 8))
plt.title("숫자 4의 {}번째특징점의 설명자".format(i))

plt.tight_layout()
plt.show()

plt.figure(figsize=(15, 9))
plt.subplot(1, 2, 1)
plt.imshow(x_train[516], cmap='gray')
pts_x = [kp.pt[0] for kp in df.kps[10]]
pts_y = [kp.pt[1] for kp in df.kps[10]]
plt.scatter(pts_x[:4], pts_y[:4], s=200, c='r')
plt.axis('off')

des = df.des[101]
for i in range(4):
plt.subplot(4, 2, (i+1)*2)
plt.bar(np.arange(len(des[i])), des[i])
plt.xticks(range(0, len(des[i]), 32))
plt.title("숫자 3의 {}번째특징점의 설명자".format(i))

plt.tight_layout()
plt.show()

png

png

1
2
3
4
5
6
7
8

df = df.dropna()
df = df.reset_index(drop=True)
X = []
for des in df.des:
for d in des:
X.append(d)
X = np.asarray(X)
1
2
3
4
from sklearn.cluster import KMeans
model_clu = KMeans(n_clusters=8, init="k-means++").fit(X)

clu = model_clu.predict(X)

클러스터링 결과의 시각화

1
2
3
4
5
6
from sklearn.manifold import TSNE

X_embedded = TSNE().fit_transform(X[:1000])
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=clu[:1000], cmap="tab20")
plt.title("클러스터링 결과 시각화")
plt.show()

png

설명자의 클러스터로 Count Vector 생성

1
2
3
def (des):
clu_nums = model_clu.predict(des.reshape(-1, 128))
return np.bincount(clu_nums, minlength=20)

이미지에서 추출한 특징점들의 설명자를 클러스터 카운트 벡터로 변환

1
2
3
4
X = []
for des in df.des:
X.append(vbow_vectorizer(des))
y = df.label
1
X[:5]
[array([0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 2, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]

분류 모델 생성

1
2
3
from sklearn.naive_bayes import MultinomialNB

model_cls = MultinomialNB().fit(np.asarray(X), y)

분류기 성능

1
2
3
4
5
from sklearn.metrics import classification_report

y_true = y
y_pred = model_cls.predict(X)
print(classification_report(y_true, y_pred))
             precision    recall  f1-score   support

          0       0.26      0.44      0.33       944
          1       0.32      0.41      0.36       628
          2       0.22      0.04      0.07       954
          3       0.22      0.27      0.24       923
          4       0.21      0.13      0.16       973
          5       0.39      0.56      0.46       966
          6       0.19      0.09      0.12       932
          7       0.32      0.38      0.35       965
          8       0.32      0.25      0.28       958
          9       0.46      0.59      0.52       987

avg / total       0.29      0.31      0.29      9230
1
2
3
4
def test(img):
kps, des = sift.detectAndCompute(img, None)
vbow = vbow_vectorizer(des)
return model_cls.predict(vbow.reshape(-1,20))
1
2
3
4
print(test(x_train[11]))
plt.imshow(x_train[11], cmap="gray")
plt.axis('off')
plt.show()
[5]

png

1
2
3
4
5
6
7
8
from sklearn.metrics.pairwise import cosine_similarity

def check_similarity(img1, img2):
kps1, des1 = sift.detectAndCompute(img1, None)
kps2, des2 = sift.detectAndCompute(img2, None)
vbow1 = vbow_vectorizer(des1)
vbow2 = vbow_vectorizer(des2)
return cosine_similarity(vbow1.reshape(-1, 20), vbow2.reshape(-1, 20))
1
2
3
4
5
6
7
8
9
10
plt.figure()
print("두 이미지 간의 유사도 = {}".format(check_similarity(x_train[11], x_train[1])))
plt.subplot(1,2,1)
plt.imshow(x_train[11], cmap="gray")
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(x_train[1], cmap="gray")
plt.axis('off')
plt.tight_layout()
plt.show()
두 이미지 간의 유사도 = [[0.65659915]]

png