pdfbox = tabula pdfminer = camelot
なのでテキスト変換できてない場合はCSVも失敗
レイアウト座標情報に対するランク学習を使って、NDLOCRに読み順付与機能(読み順ソート機能)を追加してみました。https://t.co/3h7OXaHCjS
— Toru Aoike (@blue0620) 2022年5月2日
import pandas as pd year = "2022" def make_cal(df0, fn): df1 = df0.pivot( index=["remainder"], columns=["month", "quotient"], values=["day", "value"] ) df1.columns = df1.columns.reorder_levels(order=[1, 2, 0]) df1.sort_index(axis=1, inplace=True) df1.insert(12 + 0, "セパ1", pd.NA) df1.insert(24 + 1, "セパ2", pd.NA) df1.insert(36 + 2, "セパ3", pd.NA) df1.insert(48 + 3, "セパ4", pd.NA) df1.to_csv(fn, encoding="utf_8_sig", index=False, header=False) # 祝日 holidays = pd.read_csv( "https://www8.cao.go.jp/chosei/shukujitsu/syukujitsu.csv", encoding="cp932", index_col=0, parse_dates=True, header=None, names=["date", "holiday"], skiprows=1, ) # 365日 df_dates = pd.date_range(f"{year}-01-01", f"{year}-12-31", freq="D") df0 = pd.DataFrame(index=df_dates).join(holidays) df0["month"] = df0.index.month df0["day"] = df0.index.day df0["weekday"] = df0.index.weekday df0["week"] = (df0["day"] - 1) // 7 df0["quotient"] = (df0["day"] - 1) // 16 df0["remainder"] = (df0["day"] - 1) % 16 df0["value"] = "出" df1 = df0.copy() # 日曜日だけ休み df1["value"].mask(df1["weekday"] == 6, "休", inplace=True) make_cal(df1, f"{year}_SUNx.csv") # 第2・4休み df1["value"].mask( (df1["weekday"] == 5) & ((df1["week"] == 1) | (df1["week"] == 3)), "休", inplace=True ) make_cal(df1, f"{year}_SUNxSAT24.csv") # 祝日休み df1["value"].mask(df1["holiday"].notna(), "休", inplace=True) make_cal(df1, f"{year}_SUNxSAT24HOLx.csv") # 土曜日休み df1["value"].mask(df1["weekday"] > 4, "休", inplace=True) make_cal(df1, f"{year}_SUNxSATxHOLx.csv")
NDL OCRを Google Colaboratoryで動くようにしてみました。無料で誰でも試せるはず、、、https://t.co/PQFVA1Zriz
— Toru Aoike (@blue0620) 2022年4月27日
from folium_vector import VectorGridProtobuf import folium map = folium.Map(tiles=None, location=[33.84167, 132.76611], zoom_start=14) # Googleマップ標準 folium.raster_layers.TileLayer( "https://{s}.google.com/vt/lyrs=m&x={x}&y={y}&z={z}", subdomains=["mt0", "mt1", "mt2", "mt3"], name="Google Map", attr="<a href='https://developers.google.com/maps/documentation' target='_blank'>Google Map</a>", ).add_to(map) options = { "vectorTileLayerStyles": { "rakuten": { "fill": True, "weight": 0, "fillColor": "orange", "fillOpacity": 0.4, }, } } vc = VectorGridProtobuf("https://area.uqcom.jp/api2/rakuten/{z}/{x}/{y}.mvt", "auローミング", options) map.add_child(vc) map.save("map.html")
!pip install git+https://github.com/iwpnd/folium-vector.git
import folium from folium_vector import VectorGridProtobuf url = "https://area.uqcom.jp/api2/rakuten/{z}/{x}/{y}.mvt" m = folium.Map() options = { "vectorTileLayerStyles": { "rakuten": { "fill": True, "weight": 0, "fillColor": "orange", "fillOpacity": 0.4, }, } } vc = VectorGridProtobuf(url, "folium_layer_name", options) m.add_child(vc) m m.save("map.html")
import folium import geopandas as gpd import pandas as pd from folium.plugins import Search url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vRE1NoYtNw1FmjRQ8wcdPkcE0Ryeoc2mfFkCQPHjzwL5CpwNKkLXnBl_F7c0LZjrtbLtRLH55ZVi6gQ/pub?gid=0&single=true&output=csv" df = ( pd.read_csv( url, index_col=0, usecols=[0, 1, 2, 3, 7, 8, 9, 10, 11, 12, 13, 14], dtype=str ) .dropna(how="all") .fillna("") ) gdf = gpd.GeoDataFrame( df, geometry=gpd.points_from_xy(df["経度"], df["緯度"]), crs="EPSG:6668" ) gdf map = folium.Map( location=[33.84167, 132.76611], tiles="https://cyberjapandata.gsi.go.jp/xyz/pale/{z}/{x}/{y}.png", attr='© <a href="https://maps.gsi.go.jp/development/ichiran.html">国土地理院</a>', zoom_start=10, ) towergeo = folium.GeoJson(gdf, name="Tower").add_to(map) towersearch = Search( layer=towergeo, geom_type="Point", placeholder="場所検索", search_label="場所", collapsed=True, ).add_to(map) map.save("map.html") map