github.com
jakevdp.github.io
import pandas as pd
df1 = pd.read_excel("URL", sheet_name=0, header=None)
df1.to_csv("data.tsv", sep="\t")
df1.notnull().sum(axis=0)
df1.notnull().sum(axis=1)
df1.iloc[行, 列] = "テキスト"
df2 = df1.loc[df1.notnull().sum(axis=1) > 2, df1.notnull().sum() > 2].copy()
df2.fillna(method="ffill", inplace=True)
df2.fillna(method="ffill", axis=1, inplace=True)
s.map({"a": 0, "b": 1})
df.info()
df.shape
df.count()
df.dtypes
df.describe()
欠損処理
df.mask(df == "", inplace=True)
df.mask(df.isin([" ", " "]), inplace=True)
pd.to_numeric(df["列名"], errors="coerce")
削除
df2.drop("行名", inplace=True)
df2.drop("列名", axis=1, inplace=True)
df.dropna(thresh=5, inplace=True)
df.dropna(subset=["列名"], inplace=True)
df.dropna(how="all")
df.drop_duplicated()
columns設定
df.iloc[1:].set_axis(df.iloc[0].to_list(), axis=1)
columns = ["".join(i) for i in zip(*(df.head(2).values))]
df.iloc[2:].set_axis(columns, axis=1)
df.columns.map("".join)