I am currently studying the book "Hands-On Machine Learning with Scikit-Learn, Keras and TensorFlow". I tried running the following example, without success however. The link is working, pandas is installed correctly, os, tarfile and urllib are system packages. Still, I get the error message below (tried Jupyter & Spyder):
import os
import tarfile
import urllib
import pandas as pd
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"
def fetch_housing_data(housing_url = HOUSING_URL, housing_path = HOUSING_PATH):
os.makedirs(housing_path, exist_ok = True)
tgz_path = os.path.join(housing_path, "housing.tgz")
urllib.request.urlretrieve(housing_url, tgz_path)
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path = housing_path)
housing_tgz.close()
def load_housing_data(housing_path = HOUSING_PATH):
csv_path = os.path.join(housing_path, "housing.csv")
return pd.read_csv(csv_path)
housing = load_housing_data()
print(housing)
Error message in Jupyter:
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) in 21 return pd.read_csv(csv_path) 22 ---> 23 housing = load_housing_data() 24 housing.head()
in load_housing_data(housing_path) 19 def load_housing_data(housing_path = HOUSING_PATH): 20 csv_path = os.path.join(housing_path, "housing.csv") ---> 21 return pd.read_csv(csv_path) 22 23 housing = load_housing_data()
~\Miniconda3\lib\site-packages\pandas\io\parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision) 684 ) 685 --> 686 return _read(filepath_or_buffer, kwds) 687 688
~\Miniconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds) 450 451 # Create the parser. --> 452 parser = TextFileReader(fp_or_buf, **kwds) 453 454 if chunksize or iterator:
~\Miniconda3\lib\site-packages\pandas\io\parsers.py in init(self, f, engine, **kwds) 934 self.options["has_index_names"] = kwds["has_index_names"] 935 --> 936 self._make_engine(self.engine) 937 938 def close(self):
~\Miniconda3\lib\site-packages\pandas\io\parsers.py in _make_engine(self, engine) 1166 def _make_engine(self, engine="c"): 1167 if engine == "c": -> 1168 self._engine = CParserWrapper(self.f, **self.options) 1169 else: 1170 if engine == "python":
~\Miniconda3\lib\site-packages\pandas\io\parsers.py in init(self, src, **kwds) 1996 kwds["usecols"] = self.usecols 1997 -> 1998 self._reader = parsers.TextReader(src, **kwds) 1999 self.unnamed_cols = self._reader.unnamed_cols 2000
pandas_libs\parsers.pyx in pandas._libs.parsers.TextReader.cinit()
pandas_libs\parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: [Errno 2] No such file or directory: 'datasets\housing\housing.csv'
I would appreciate if someone took the time to reproduce/provide input whether the code returns the error message.
Many thanks!
The local file
"datasets/housing/housing.csv"
is created only when you callYour code sample does not call this function. Try adding this line before
housing = load_housing_data()
.