{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# Let's read in the data file\n", "df = pd.read_csv('../data/epa/RD_501_88101_2012-0.txt', sep = \"|\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = df[1:] # delete the first row starting with \"# RC\"\n", "df = df[:-1] # delete last row with number of entries\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.columns # list all columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df2 = pd.DataFrame(df, columns = ['# RD', 'Action Code', 'State Code', 'County Code', 'Site ID',\n", " 'Parameter', 'POC', 'Sample Duration', 'Unit', 'Method', 'Date',\n", " 'Start Time', 'Sample Value', 'Null Data Code', 'Sampling Frequency',\n", " 'Monitor Protocol (MP) ID'])\n", "df2.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t = df2.pivot_table('Sampling Frequency', index=['State Code'],\n", " aggfunc='count')\n", "t" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }