PDFの行ずれをグループで結合

qiita.com

!pip install camelot
!pip install pikepdf
import camelot
import pandas as pd
import pikepdf

# 保護解除
with pikepdf.open("data.pdf") as pdf:
    pdf.save("output.pdf")

# strip_textで空白改行を除去
tables = camelot.read_pdf("output.pdf", flavor="stream", strip_text=" .\n")

# タイトル
title = "".join(tables[0].data[0])

# テーブル数確認
print(tables.n)

# 複数の場合はtables[0]の数字を増やす、tables[1]、tables[2]
df0 = pd.DataFrame(tables[0].data[1:])

df0.to_csv("result.csv", encoding="utf_8_sig")

# 以下加工する場合

# サブタイトル
subtitle = df0[0].str.cat(sep="")

df1 = df0.iloc[:, 1:]

# 欠損行を下と同じグループ
df1["grp"] = (~(df1 == "").any(axis=1).shift(1).fillna(False)).cumsum()

df1

# グループで結合
df2 = df1.groupby("grp").agg("".join)

df2.to_csv("result.csv", encoding="utf_8_sig")

raspberry pi 2022-02-19

f:id:imabari_ehime:20220219102920p:plain f:id:imabari_ehime:20220219102932p:plain f:id:imabari_ehime:20220219102942p:plain f:id:imabari_ehime:20220219102950p:plain f:id:imabari_ehime:20220219102959p:plain f:id:imabari_ehime:20220219103009p:plain f:id:imabari_ehime:20220219103018p:plain

# 空ファイル作成
ssh

sudo raspi-config

sudo apt install avahi-daemon
sudo systemctl enable avahi-daemon
sudo systemctl start avahi-daemon

# ユーザー名追加
sudo adduser imabari

# sudoに追加
sudo gpasswd -a imabari sudo

#リブート
sudo reboot


#アカウントpiの削除
sudo gpasswd -d pi sudo
sudo userdel -r pi

# 時刻設定
sudo nano /etc/systemd/timesyncd.conf
NTP=ntp.nict.jp

sudo systemctl daemon-reload
sudo systemctl restart systemd-timesyncd

#リポジトリ変更
sudo nano /etc/apt/sources.list

# 変更前
deb http://raspbian.raspberrypi.org/raspbian/ bullseye main contrib non-free rpi

#変更後
deb http://ftp.jaist.ac.jp/raspbian/ bullseye main contrib non-free rpi

# SWAP無効
sudo swapoff --all
sudo systemctl stop dphys-swapfile
sudo systemctl disable dphys-swapfile
systemctl status dphys-swapfile

free -h

sudo apt install python3-bs4 python3-requests python3-html5lib python3-lxml python3-requests-oauthlib python3-tweepy

平均身長の推移をplotlyでグラフ化

oku.edu.mie-u.ac.jp

練習でplotlyでグラフ化してみた

import pandas as pd

pd.options.plotting.backend = "plotly"

df = pd.read_csv(
    "https://oku.edu.mie-u.ac.jp/~okumura/python/data/height.csv", index_col=0
)

fig = df[["男17歳", "男16歳", "男15歳"]].plot(markers=True)

fig.update_layout(
    width=1000,
    height=600,
)
# HTMLをコピペ
fig.write_html("view.html", include_plotlyjs = "cdn", full_html = False)

楽天モバイルのエリア状況をバックアップ

# スクレイピング

import datetime

import requests
from bs4 import BeautifulSoup

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}


def fetch_soup(url, parser="html.parser"):

    r = requests.get(url, headers=headers)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, parser)

    return soup


url = "https://network.mobile.rakuten.co.jp/area/"

soup = fetch_soup(url)

text = soup.select_one("div.area-Top_Map > p").get_text(strip=True)

dt_update = datetime.datetime.strptime(text, "%Y年%m月%d日更新").date()

update = dt_update.strftime("%Y%m%d")

print(update)

# マップ

from staticmap import StaticMap

rakuten2m = "https://gateway-api.global.rakuten.com/dsd/geoserver/4g2m/mno_coverage_map/gwc/service/gmaps?LAYERS=mno_coverage_map:all_map&FORMAT=image/png&TRANSPARENT=TRUE&x={x}&y={y}&zoom={z}"
rakuten4m = "https://gateway-api.global.rakuten.com/dsd/geoserver/4g4m/mno_coverage_map/gwc/service/gmaps?LAYERS=mno_coverage_map:all_map&FORMAT=image/png&TRANSPARENT=TRUE&x={x}&y={y}&zoom={z}"

cities = [
    {"name": "東予", "lat": 34.024779, "lng": 133.183823},
    {"name": "中予", "lat": 33.673497, "lng": 132.702484},
    {"name": "南予1", "lat": 33.493021, "lng": 132.521553},
    {"name": "南予2", "lat": 33.170318, "lng": 132.524815},
]

width, height = 3000, 2000
zoom = 12

for city in cities:

    for i, url in enumerate([rakuten2m, rakuten4m], 1):

        smap = StaticMap(width, height, url_template=url)

        img = smap.render(zoom=zoom, center=[city["lng"], city["lat"]])
        img.save(f'{update}_{city["name"]}_m{i*2}.png')

そらまめくんのデータをplotlyで表示

soramame.env.go.jp

import pathlib

import pandas as pd

pd.options.plotting.backend = "plotly"

df = pd.read_csv(
    "https://soramame.env.go.jp/data/sokutei/NoudoTime/38201030/7day.csv",
    dtype={
        "SO2": float,
        "NO": float,
        "NO2": float,
        "NOX": float,
        "SPM": float,
        "PM2.5": float,
        "WD": object,
        "WS": float,
    },
    na_values=["-", "  "],
)

df.index = pd.to_datetime(
    {"year": df["年"], "month": df["月"], "day": df["日"], "hour": df["時"]}
)

df.drop(["年", "月", "日", "時"], axis=1, inplace=True)

fig = df[["SO2", "NO", "NO2", "NOX"]].plot()

# view

fig.show()
# html

fig.write_html("plotly.html", include_plotlyjs = "cdn", full_html = True)

”””
p = pathlib.Path("plotly.html")

with p.open(mode="w") as f:
    f.write(fig.to_html(include_plotlyjs="cdn"))
”””
# png

# !pip install -U kaleido

import plotly.io as pio

pio.kaleido.scope.default_format = "png"

pio.kaleido.scope.default_width  = 1400
pio.kaleido.scope.default_height = 1000

fig.write_image("table.png")

楽天エリアマップ2カ月以降のエリア色変更

import cv2
import numpy as np
from google.colab.patches import cv2_imshow
from PIL import Image
from staticmap import StaticMap


def get_map(
    url, lat=33.84167, lng=132.76611, width=2000, height=2000, zoom=12, fn="map.png"
):

    smap = StaticMap(width, height, url_template=url)

    img = smap.render(zoom=zoom, center=[lng, lat])

    img.save(fn)


def bgr_mask(img, bgr):

    bgrLower = np.array(bgr)
    bgrUpper = np.array(bgr)

    img_mask = cv2.inRange(img, bgrLower, bgrUpper)

    return img_mask


lat, lng, zoom = 34.0663192, 132.9975244, 14

kokudo = "https://cyberjapandata.gsi.go.jp/xyz/pale/{z}/{x}/{y}.png"
rakuten2m = "https://gateway-api.global.rakuten.com/dsd/geoserver/4g2m/mno_coverage_map/gwc/service/gmaps?LAYERS=mno_coverage_map:all_map&FORMAT=image/png&TRANSPARENT=TRUE&x={x}&y={y}&zoom={z}"
rakuten4m = "https://gateway-api.global.rakuten.com/dsd/geoserver/4g4m/mno_coverage_map/gwc/service/gmaps?LAYERS=mno_coverage_map:all_map&FORMAT=image/png&TRANSPARENT=TRUE&x={x}&y={y}&zoom={z}"

get_map(kokudo, lat=lat, lng=lng, zoom=zoom, width=2000, height=2000, fn="map.png")

get_map(
    rakuten2m, lat=lat, lng=lng, zoom=zoom, width=2000, height=2000, fn="area2m.png"
)
get_map(
    rakuten4m, lat=lat, lng=lng, zoom=zoom, width=2000, height=2000, fn="area4m.png"
)

srcmap = cv2.imread("map.png")

src2m = cv2.imread("area2m.png")
src4m = cv2.imread("area4m.png")

"""
# エリア
[186, 102, 255]

# 拡大予定エリア
[221, 128, 196]

# パートナー
[215, 166, 255]
"""

mask2m = bgr_mask(src2m, [221, 128, 196])
mask4m = bgr_mask(src4m, [221, 128, 196])

masknow = bgr_mask(src2m, [186, 102, 255])
maskau = bgr_mask(src2m, [215, 166, 255])

# 現在のエリアと拡大エリアを合体
mask = cv2.bitwise_or(masknow, mask4m)

# パートナーエリアを除去
area_now = cv2.bitwise_and(src4m, src4m, mask=mask)

# エリア外を黒から白
area_now[mask == 0] = [255, 255, 255]

# 拡大エリアの2カ月と4カ月の差分
mask4mnew = cv2.bitwise_xor(mask2m, mask4m)

# 拡大エリアの4カ月のみ色変更
area_now[mask4mnew == 255] = [0, 0, 128]

cv2_imshow(area_now)

# bitwise_and
area = cv2.bitwise_and(srcmap, area_now)
cv2.imwrite("cv2and.png", area)

cv2_imshow(area)

# addWeighted
area = cv2.addWeighted(srcmap, 0.3, area_now, 0.7, 0)
cv2.imwrite("cv2add.png", area)

cv2_imshow(area)

# pillow
area = cv2.cvtColor(area_now, cv2.COLOR_BGR2BGRA)

area[:, :, 3] = np.where(mask == 0, 0, 160)

cv2_imshow(area)

cv2.imwrite("dst.png", area)

im_map = Image.open("map.png")

im_area = Image.open("dst.png")

im_map.putalpha(255)

# im_area.putalpha(160)

im_raku = Image.alpha_composite(im_map, im_area)

im_raku.save("rakuten.png")