CSV

import pandas as pd

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.width', 85)
pd.set_option('display.max_columns', 8)

landtemps = pd.read_csv('landtempssample.csv',
        names=['stationid', 'year', 'month', 'avgtemp',
            'latitude', 'longitude', 'elevation', 'station',
            'countryid', 'country'],
            skiprows=1,
            parse_dates=[['month', 'year']],
            low_memory=False)
type(landtemps)

pandas.core.frame.DataFrame

skiprows=1 -- skips the first row (since these are the headers);
parse_dates= converts the month and year columns to a date value.

landtemps.shape, landtemps.dtypes

((100000, 9),
month_year    datetime64[ns]
stationid             object
avgtemp              float64
latitude             float64
longitude            float64
elevation            float64
station               object
countryid             object
country               object
dtype: object)

landtemps.head(7)

	month_year	stationid	avgtemp	latitude	...	elevation	station	countryid	country
0	2000-04-01	USS0010K01S	5.27	39.90	...	2,773.70	INDIAN_CANYON	US	United States
1	1940-05-01	CI000085406	18.04	-18.35	...	58.00	ARICA	CI	Chile
2	2013-12-01	USC00036376	6.22	34.37	...	61.00	SAINT_CHARLES	US	United States
3	1963-02-01	ASN00024002	22.93	-34.28	...	65.50	BERRI_IRRIGATION	AS	Australia
4	2001-11-01	ASN00028007	NaN	-14.78	...	79.40	MUSGRAVE	AS	Australia
5	1991-04-01	USW00024151	5.59	42.15	...	1,362.50	MALAD_CITY	US	United States
6	1993-12-01	RSM00022641	-10.17	63.90	...	13.00	ONEGA	RS	Russia

7 rows × 9 columns

landtemps.describe()

	avgtemp	latitude	longitude	elevation
count	85,554.00	100,000.00	100,000.00	100,000.00
mean	10.92	35.08	-38.12	565.56
std	11.52	23.55	84.30	1,073.65
min	-70.70	-90.00	-179.98	-350.00
25%	3.46	33.19	-101.66	64.00
50%	12.22	40.79	-79.57	240.80
75%	19.57	47.42	17.53	644.70
max	39.95	82.52	179.75	9,999.00

landtemps.rename(columns={'month_year':'measuredate'}, inplace=True)
landtemps.dtypes

measuredate    datetime64[ns]
stationid              object
avgtemp               float64
latitude              float64
longitude             float64
elevation             float64
station                object
countryid              object
country                object
dtype: object

landtemps.avgtemp.describe()

count   85,554.00
mean        10.92
std         11.52
min        -70.70
25%          3.46
50%         12.22
75%         19.57
max         39.95
Name: avgtemp, dtype: float64

Below, we use isnull() chained with sum() to find and count each column's missing values:

landtemps.isnull().sum()

measuredate        0
stationid          0
avgtemp        14446
latitude           0
longitude          0
elevation          0
station            0
countryid          0
country            5
dtype: int64

This removes rows in avgtemp that are NULL:

landtemps.dropna(subset=['avgtemp'], inplace=True)
landtemps.shape

1	`(85554, 9)`