Start with files
0. Define own data/testing
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38]]
Y = ['male', 'male', 'female']
1. Read/Write Local File
1.1 CSV
1.1.1 CSV lib
Example 1
dates = []
prices = []
def get_data(filename):
with open(filename, 'r') as csvfile:
csvFileReader = csv.reader(csvfile)
next(csvFileReader) # skipping column names
for row in csvFileReader:
dates.append(int(row[0].split('/')[0]))
prices.append(float(row[1]))
return
get_data('appleStock.csv') # calling get_data method by passing the csv file to it
Example 2
import csv
f = open("nfl.csv", "r")
# Use the csv module to read the file, and convert the result to a list
nfl = list(csv.reader(f))
1.1.2 Panda
# read/shuffle(label)/sort/split train/test
from sklearn.cross_validation import train_test_split
#help analyse our data
import pandas as pd
#perform math calculation
import numpy as np
#load the data
telescope=pd.read_csv('MAGIC Gamma Telescope Data.csv')
#clean the data
#since the class object is already organized, we'll shuffle our data to get a better result
# the iloc() function of the telescope variable is pandas's way of getting the positions in the index.
# and we'll generate a sequence of random indices the size of our data using the permutation function of numpy's 'random' submodule.
telescope_shuffle=telescope.iloc[np.random.permutation(len(telescope))]
# since all the instances are now randomly rearranged, we'll just reset all these indices so thet are ordered ven though the data is now shuffled using the reset_index()
tele=telescope_shuffle.reset_index(drop=True)
#Store 2 classes
# now let the tele variable know what our two classes are by mapping both of them to an integer with the map()
# Change label 'g' 'h' to numeric '0' '1'
tele['Class']=tele['Class'].map({'g':0, 'h':1})
# store those class labels which we're going to predict
# using .values to retrieve them
tele_class = tele['Class'].values
#Split training, testing, and validation data
#train_test_split() to create indices for both
# we want our dataset to be arrays so we set the stratify= tele_class
# define what percentage of our data to be training anf testing
training_indices, validation_indices = training_indices, testing_indices = train_test_split(tele.index,
stratify= tele_class, train_size=0.75, test_size=0.25)
1.2 TXT
f = open("story.txt", 'r')
story_string = f.read()
1.3 Image
#open image
import PIL.Image #helps us modify our images
img0 = PIL.Image.open('2.jpg')
# format it accordingly using numpy
img0 = np.float32(img0)
2. Download from url
#operate sysyem-dependent functionality
import os
#unzip files
import zipfile
def main():
#Step 1 - download google's pre-trained neural network
url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip'
data_dir = '../data/'
model_name = os.path.split(url)[-1]
print (model_name) #inception5h.zip
#local_zip_file = os.path.join(data_dir, model_name)
#local_zip_file = "./data/inception5h.zip" # in same folder as the main.py
local_zip_file = "../data/inception5h.zip" # upper folder from the main.py. So same level as python folder
print (local_zip_file) #../data/inception5h.zip
print (os.path.exists(local_zip_file))
if not os.path.exists(local_zip_file):
print ("Not exists")
if os.path.exists(local_zip_file):
print ("exists!")
#sys.exit(0)
'''
if not os.path.exists(local_zip_file):
# Download
model_url = urllib.request.urlopen(url)
with open(local_zip_file, 'wb') as output:
output.write(model_url.read())
# Extract
with zipfile.ZipFile(local_zip_file, 'r') as zip_ref:
zip_ref.extractall(data_dir)
'''
if __name__ == '__main__':
main()