ensekitt.hatenablog.com
zenn.dev
!pip install pdfplumber
"""再起動"""
!apt update
!wget "https://www.city.kumamoto.jp/common/UploadFileDsp.aspx?c_id=5&id=4645&sub_id=18&flid=239935" -O data.pdf
!apt install libmagickwand-dev ghostscript
# Commented out IPython magic to ensure Python compatibility.
# %%writefile /etc/ImageMagick-6/policy.xml
# <?xml version="1.0" encoding="UTF-8"?>
# <!DOCTYPE policymap>
#
# <policymap>
#
# <policy domain="resource" name="memory" value="256MiB"/>
# <policy domain="resource" name="map" value="512MiB"/>
# <policy domain="resource" name="width" value="16KP"/>
# <policy domain="resource" name="height" value="16KP"/>
# <policy domain="resource" name="area" value="128MB"/>
# <policy domain="resource" name="disk" value="1GiB"/>
#
# <policy domain="delegate" rights="none" pattern="URL"/>
# <policy domain="delegate" rights="none" pattern="HTTPS"/>
# <policy domain="delegate" rights="none" pattern="HTTP"/>
#
# <policy domain="path" rights="none" pattern="@*"/>
# <policy domain="cache" name="shared-secret" value="passphrase" stealth="true"/>
#
# <policy domain="coder" rights="none" pattern="PS"/>
# <policy domain="coder" rights="none" pattern="PS2"/>
# <policy domain="coder" rights="none" pattern="PS3"/>
# <policy domain="coder" rights="none" pattern="EPS"/>
# <policy domain="coder" rights="read|write" pattern="PDF" />
# <policy domain="coder" rights="none" pattern="XPS"/>
# </policymap>
import pdfplumber
import pandas as pd
pdf = pdfplumber.open("data.pdf")
page = pdf.pages[1]
bboxs = [
[49, 172, 418, 509],
[455, 172, 824, 509],
[49, 712, 418, 1049],
[455, 712, 824, 1049],
]
n = 1
bbox = bboxs[n]
crop = page.within_bbox(bbox)
im = crop.to_image(resolution=300)
im
im.save("pdf.png", format="PNG")
"""# 画像認識"""
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
from matplotlib import pyplot as plt
img = cv2.imread("pdf.png")
cv2_imshow(img)
rows = 5
cols = 6
chunks = []
for row_img in np.array_split(img, rows, axis=0):
for chunk in np.array_split(row_img, cols, axis=1):
chunks.append(chunk[5:-5, 5:-5, :])
print(len(chunks))
from pathlib import Path
output_dir = Path("output")
output_dir.mkdir(exist_ok=True)
for i, chunk in enumerate(chunks):
save_path = output_dir / f"chunk_{i:02d}.png"
cv2.imwrite(str(save_path), chunk)
!zip -r output.zip output
"""# 画像確認"""
day1, day2 = 6, 24
chunks[day1].shape
img1 = chunks[day1][:, 90: ,:]
img1g = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
cv2_imshow(img1)
chunks[day2].shape
img2 = chunks[day2][:, 90: ,:]
img2g = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
cv2_imshow(img2)
"""# ヒストグラム比較
+ https://ensekitt.hatenablog.com/entry/2018/07/09/200000
"""
hist_g_1 = cv2.calcHist([img1], [0], None, [256], [0, 256])
hist_g_2 = cv2.calcHist([img2], [0], None, [256], [0, 256])
comp_hist = cv2.compareHist(hist_g_1, hist_g_2, cv2.HISTCMP_CORREL)
comp_hist
def img2hist(img):
histrgb = []
color = ("b", "g", "r")
for i, col in enumerate(color):
histrgb.append(cv2.calcHist([img], [i], None, [256], [0, 256]))
plt.plot(histrgb[i], color=col)
plt.xlim([0, 256])
plt.show()
return histrgb
histrgb_1 = img2hist(img1)
histrgb_2 = img2hist(img2)
histarray = np.array(histrgb_1)
histvec_1 = histarray.reshape(histarray.shape[0] * histarray.shape[1], 1)
histvec_1.shape
histarray = np.array(histrgb_2)
histvec_2 = histarray.reshape(histarray.shape[0] * histarray.shape[1], 1)
histvec_2.shape
print(cv2.compareHist(histvec_1, histvec_2, 0))
"""# akaze"""
akaze = cv2.AKAZE_create()
kp1, des1 = akaze.detectAndCompute(img1, None)
kp2, des2 = akaze.detectAndCompute(img2, None)
bf = cv2.BFMatcher()
matches = bf.knnMatch(des1, des2, k=2)
ratio = 0.5
good = []
for m, n in matches:
if m.distance < ratio * n.distance:
good.append([m])
img_3 = cv2.drawMatchesKnn(img1, kp1, img2, kp2, good, None, flags=2)
cv2_imshow(img_3)
len(good)
"""# 類似度
https://zenn.dev/kazuhito/articles/1dc73eeb7e1297
"""
hash_func = cv2.img_hash.AverageHash_create()
hash_func = cv2.img_hash.BlockMeanHash_create()
hash_func = cv2.img_hash.ColorMomentHash_create()
hash_func = cv2.img_hash.MarrHildrethHash_create()
hash_func = cv2.img_hash.PHash_create()
hash_func = cv2.img_hash.RadialVarianceHash_create()
"""## 比較"""
hash1 = hash_func.compute(img1)
hash2 = hash_func.compute(img2)
result = hash_func.compare(hash1, hash2)
result