Start with files

2 minute read

0. Define own data/testing

X = [[181, 80, 44], [177, 70, 43], [160, 60, 38]]
Y = ['male', 'male', 'female']

1. Read/Write Local File

1.1 CSV

1.1.1 CSV lib

Example 1

dates = []
prices = []
def get_data(filename):
    with open(filename, 'r') as csvfile:
        csvFileReader = csv.reader(csvfile)
        next(csvFileReader)	# skipping column names
        for row in csvFileReader:
            dates.append(int(row[0].split('/')[0]))
            prices.append(float(row[1]))
    return
get_data('appleStock.csv') # calling get_data method by passing the csv file to it

Example 2

import csv
f = open("nfl.csv", "r")
# Use the csv module to read the file, and convert the result to a list
nfl = list(csv.reader(f))

1.1.2 Panda

# read/shuffle(label)/sort/split train/test
from sklearn.cross_validation import train_test_split
#help analyse our data
import pandas as pd
#perform math calculation
import numpy as np
#load the data
telescope=pd.read_csv('MAGIC Gamma Telescope Data.csv')
#clean the data
#since the class object is already organized, we'll shuffle our data to get a better result
# the iloc() function of the telescope variable is pandas's way of getting the positions in the index.
# and we'll generate a sequence of random indices the size of our data using the permutation function of numpy's 'random' submodule.
telescope_shuffle=telescope.iloc[np.random.permutation(len(telescope))]
# since all the instances are now randomly rearranged, we'll just reset all these indices so thet are ordered ven though the data is now shuffled using the reset_index()
tele=telescope_shuffle.reset_index(drop=True)

#Store 2 classes
# now let the tele variable know what our two classes are by mapping both of them to an integer with the map()
# Change label 'g' 'h' to numeric '0' '1'
tele['Class']=tele['Class'].map({'g':0, 'h':1})
# store those class labels which we're going to predict
# using .values to retrieve them
tele_class = tele['Class'].values

#Split training, testing, and validation data
#train_test_split() to create indices for both
# we want our dataset to be arrays so we set the stratify= tele_class
# define what percentage of our data to be training anf testing
training_indices, validation_indices = training_indices, testing_indices = train_test_split(tele.index,
    stratify= tele_class, train_size=0.75, test_size=0.25)

1.2 TXT

f = open("story.txt", 'r')
story_string = f.read()

1.3 Image

#open image
import PIL.Image #helps us modify our images
img0 = PIL.Image.open('2.jpg')
# format it accordingly using numpy
img0 = np.float32(img0)

2. Download from url

#operate sysyem-dependent functionality
import os
#unzip files
import zipfile
def main():
    #Step 1 - download google's pre-trained neural network
    url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip'
    data_dir = '../data/'
    model_name = os.path.split(url)[-1]

    print (model_name) #inception5h.zip
    #local_zip_file = os.path.join(data_dir, model_name)
    #local_zip_file = "./data/inception5h.zip" # in same folder as the main.py
    local_zip_file = "../data/inception5h.zip" # upper folder from the main.py. So same level as python folder
    print (local_zip_file) #../data/inception5h.zip

    print (os.path.exists(local_zip_file))
    if not os.path.exists(local_zip_file):
    	print ("Not exists")

    if os.path.exists(local_zip_file):
    	print ("exists!")
    #sys.exit(0)
    '''
    if not os.path.exists(local_zip_file):
        # Download
        model_url = urllib.request.urlopen(url)
        with open(local_zip_file, 'wb') as output:
            output.write(model_url.read())
        # Extract
        with zipfile.ZipFile(local_zip_file, 'r') as zip_ref:
            zip_ref.extractall(data_dir)
  	'''
if __name__ == '__main__':
    main()