pandas

qiita.com

news.mynavi.jp

amalog.hateblo.jp

pandas: powerful Python data analysis toolkit — pandas 0.23.0 documentation

www.atmarkit.co.jp

www.atmarkit.co.jp

www.atmarkit.co.jp

# 型確認
df.dtypes

# 欠損値のカウント
df.isnull().sum()

# 欠損値確認
df[df.isnull().any(axis=1)]



#文字除去
df['xxx'].str.strip('yyy')

# 型変換
df['xxx'].astype(np.int64)

# 抽出
df1 = df.iloc[:, 3:5]

# 置換

## 条件がTrueの時はそのまま、Falseの時はNaN
df['xxx'].where(df['xxx'] == 'hoge')

## 条件がTrueの時はNaN、Falseの時はそのまま
df['xxx'].mask(df['xxx'] == 'hoge')


# dfをそのまま適用
inplace=True

# datetimeをtimeを分離
df['time'] = df.index.time.tolist()
df['time'] = df['datetime'].dt.time.tolist()

#ピボットテーブル
pv = df.pivot_table(values = 'xxx', index = 'yyy', columns='zzz', aggfunc = sum, fill_value = 0)

# 順位
df['xxx'].rank(ascending=False, method='min')

# ソート
df.sort_values(['xxx', 'yyy', 'zzz'], ascending=[True,True, True], inplace=True)
dfs = pd.read_html(url, header=0, index_col=0, match='xxxxx')

pandas.read_html(
    io,
    match='.+',
    flavor=None,
    header=None,
    index_col=None,
    skiprows=None,
    attrs=None,
    parse_dates=False,
    tupleize_cols=None,
    thousands=', ',
    encoding=None,
    decimal='.',
    converters=None,
    na_values=None,
    keep_default_na=True)

http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_html.html

df = pd.read_csv(url, encoding='shift_jis', skiprows=1, header=0, index_col=0)

pandas.read_csv(
    filepath_or_buffer,
    sep=', ',
    delimiter=None,
    header='infer',
    names=None,
    index_col=None,
    usecols=None,
    squeeze=False,
    prefix=None,
    mangle_dupe_cols=True,
    dtype=None,
    engine=None,
    converters=None,
    true_values=None,
    false_values=None,
    skipinitialspace=False,
    skiprows=None,
    nrows=None,
    na_values=None,
    keep_default_na=True,
    na_filter=True,
    verbose=False,
    skip_blank_lines=True,
    parse_dates=False,
    infer_datetime_format=False,
    keep_date_col=False,
    date_parser=None,
    dayfirst=False,
    iterator=False,
    chunksize=None,
    compression='infer',
    thousands=None,
    decimal=b'.',
    lineterminator=None,
    quotechar='"',
    quoting=0,
    escapechar=None,
    comment=None,
    encoding=None,
    dialect=None,
    tupleize_cols=None,
    error_bad_lines=True,
    warn_bad_lines=True,
    skipfooter=0,
    skip_footer=0,
    doublequote=True,
    delim_whitespace=False,
    as_recarray=None,
    compact_ints=None,
    use_unsigned=None,
    low_memory=True,
    buffer_lines=None,
    memory_map=False,
    float_precision=None)

http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html