#!/usr/bin/env python # Theano tutorial # Solution to Exercise in section 'Using the GPU' # 1. Raw results from __future__ import absolute_import, print_function, division import numpy as np import theano import theano.tensor as tt theano.config.floatX = 'float32' rng = np.random N = 400 feats = 784 D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N, low=0, high=2).astype(theano.config.floatX)) training_steps = 10000 # Declare Theano symbolic variables x = theano.shared(D[0], name="x") y = theano.shared(D[1], name="y") w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w") b = theano.shared(np.asarray(0., dtype=theano.config.floatX), name="b") x.tag.test_value = D[0] y.tag.test_value = D[1] #print "Initial model:" #print w.get_value(), b.get_value() # Construct Theano expression graph p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b)) # Probability of having a one prediction = p_1 > 0.5 # The prediction that is done: 0 or 1 xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1) # Cross-entropy cost = tt.cast(xent.mean(), 'float32') + \ 0.01 * (w ** 2).sum() # The cost to optimize gw, gb = tt.grad(cost, [w, b]) # Compile expressions to functions train = theano.function( inputs=[], outputs=[prediction, xent], updates=[(w, w - 0.01 * gw), (b, b - 0.01 * gb)], name="train") predict = theano.function(inputs=[], outputs=prediction, name="predict") if any([n.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for n in train.maker.fgraph.toposort()]): print('Used the cpu') elif any([n.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for n in train.maker.fgraph.toposort()]): print('Used the gpu') else: print('ERROR, not able to tell if theano used the cpu or the gpu') print(train.maker.fgraph.toposort()) for i in range(training_steps): pred, err = train() #print "Final model:" #print w.get_value(), b.get_value() print("target values for D") print(D[1]) print("prediction on D") print(predict()) """ # 2. Profiling # 2.1 Profiling for CPU computations # In your terminal, type: \$ THEANO_FLAGS=profile=True,device=cpu python using_gpu_solution_1.py # You'll see first the output of the script: Used the cpu target values for D prediction on D # Followed by the output of profiling.. You'll see profiling results for each function # in the script, followed by a summary for all functions. # We'll show here only the summary: Results were produced using an Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz Function profiling ================== Message: Sum of all(2) printed profiles at exit excluding Scan op profile. Time in 10001 calls to Function.__call__: 1.300452e+00s Time in Function.fn.__call__: 1.215823e+00s (93.492%) Time in thunks: 1.157602e+00s (89.015%) Total compile time: 8.922548e-01s Number of Apply nodes: 17 Theano Optimizer time: 6.270301e-01s Theano validate time: 5.993605e-03s Theano Linker time (includes C, CUDA code generation/compiling): 2.949309e-02s Import time 3.543139e-03s Time in all call to theano.grad() 1.848292e-02s Time since theano import 2.864s Class --- <% time>