Python
Some python notes.
Package notes
Debug
from IPython.core.debugger import set_trace
- Put
set_trace()
near the bug. n[ext]
s[tep]
c[ontinue]
l[ist]
p[rint]
,pp
pp locals()
,pp globals()
a[rgs]
- Put
Jupyter Notebook
Links
- Welcome to Python cheatsheet!
- Running a notebook server
- Installing jupyter_contrib_nbextensions
- jupyter-vim-binding
- line number
- Kernels for different environments
Jupyter kernels
# List kernels
jupyter kernelspec list
# Add python kernel to jupyter
# Name is like an id. This command can also be use to change dispaly name of an existed kernel.
/path/to/kernel/env/bin/python -m ipykernel install --prefix=/path/to/jupyter/env --name 'python-my-env' --display-name 'Python x - Display name'
# Remove kernels (Or just remove the whole directory listed with the command above)
jupyter kernelspec remove <jupyter-kernel-name>
Tricks
!pwd
, 执行np.dot??
Auto reload external python modules
IPython extension to reload modules before executing user code.
%load_ext autoreload
%autoreload 2
Quick notes
enumerate
colors = ['red', 'green', 'blue', 'yellow']
for i, color in enumerate(colors):
print i, '-->', color
range
xrange
zip
izip
zip
x = [1, 2, 3]
y = [4, 5, 6]
zipped = zip(x, y)
zipped
# [(1, 4), (2, 5), (3, 6)]
x2, y2 = zip(*zipped)
x == list(x2) and y == list(y2)
# True
self(x)
def __call__(self, x):
pass
requirements.txt
pip frreze > requirements.txt
pip install -r requirements.txt
hashable
hashable: object.__hash__()
, the key of dict must be hasable
Numpy tips
- tips
np.linalg.norm(X, ord=np.inf, axis=1)
np.power(X, 2)
np.matmul(A, B)
, matrix multiplicationnp.multip
np.array(np.array_split(<array>))
np.concatenate()
np.argsort(X)[:k]
, indices of top k min valuenp.bincount(X)
,cnt[X[i]]+=1
np.argmax()
np.hstack((a, b))
, (2, 3) + (2, 1) => (2, 4)np.stack((a, b), axis=0)
, (2, 3) + (2, 3) => (2, 2, 3)np.vstack((a, b))
np.dot()
- 2-D array, matrix multiplication
- 1-D arrays, inner product
- http://www.scipy-lectures.org/intro/numpy/operations.html
np.allclose()
?np.any()
,np.all()
np.unique()
, likedrop_duplicate()
in Kerasnp.cumsum()
- Random Walks
- \(d^2=n\)
- Random Walks
distance = np.abs(a - a[:, np.newaxis])
np.ogrid()
,np.mgrid()
np.ravel(a, order='C')
, array flattenC
, C-like order, [0, 0] => [0, 1] => [0, 2] => [1, 0] => …F
, Fortran-like order, [0, 0] => [1, 0] => [2, 0] => [0, 1] => …- For multi-dimension,
C
means index front change slow - return a view, view means you change the view, the original will data also be changed
np.reshape(a, newshape,, order='C')
np.transpose(a, axes=None)
- default, a.T
np.transpose(a, axes=[1,0,2])
,a[1,2,3] == a[2,1,3] = True
- transpose doesn’t change the plane, or cube or hyper-shape
- return a view
np.resize()
- if fill, fill required number of elements, repeated in the order
np.resize([0,1,2], 5)
=> [0,1,2,0,1]
- if fill, fill required number of elements, repeated in the order
np.sort(a, axis=1)
- axis selected elements which index changed response to axis
np.argsort()
np.lookfor()
*
np.arange
: step of numbersnp.linspace
: count of numbers
Tricks
y, x = x, y
result = 1 < n < 10
x = (classA if y == 1 else classB)(param1, param2)
_
, equal with ‘ANS’ in calculatorpdb.set_trace()
, insert a debug pointpython -m http.server
(python3), start a file server in current pathpython -m SimpleHTTPServer
(python2)
[1, 2, 3][::-1]
=>[3, 2, 1]
, inverse order- Unpack
testDict = {'x': 1, 'y': 2, 'z': 3}
func(*testDict)
=>func('x', 'y', 'z')
func(**testDict)
=>func(1, 2, 3)
reduce(func, sequence[, initial])
functools.reduce()
list(itertools.chain.from_iterable(a))
[1, 2], [3, 4], [5, 6]]
=>[1, 2, 3, 4, 5, 6]
- nested list of tuples
list(more_itertools.collapse(test))
- bitwise operators
<<
,>>
&
, and|
, or~
, not^
, exclusive or
max(set(test), key=test.count)
, most frequent valuesys.getrecursionlimint()
sys.getsizeof(x)
, object sizedict(zip(keys, values))
, two arrays to a dict
Multiply / Add all items in an array
np.prod([2, 3, 4])
=>24
np.sum([2, 3, 4])
=>9
Binary representation
bin(True) # '0b1'
bin(False) # '0b0'
bin(~True) # '-0b10'
bin(~False) # '-0b1'
Identity of object
id(<object>)
Really? WTF
https://github.com/satwikkansal/wtfpython
Python 3 awesome features
10 awesome features of Python that you can’t use because you refuse to upgrade to Python 3
Unpacking
>>> a, *rest, b = range(10)
>>> a
0
>>> b
9
>>> rest
[1, 2, 3, 4, 5, 6, 7, 8]
Keyword only arguments
Avoid passing too many arguments to the function.
def sum(a, b, *, biteme=False):
if biteme:
shutil.rmtree('/')
else:
return a + b
>>> sum(1, 2, 3)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: sum() takes 2 positional arguments but 3 were given
Scopes
nonlocal
, global
def scope_test():
def do_local():
spam = "local spam"
def do_nonlocal():
nonlocal spam
spam = "nonlocal spam"
def do_global():
global spam
spam = "global spam"
spam = "test spam"
do_local()
print("After local assignment:", spam)
do_nonlocal()
print("After nonlocal assignment:", spam)
do_global()
print("After global assignment:", spam)
scope_test()
print("In global scope:", spam)
Results:
After local assignment: test spam
After nonlocal assignment: nonlocal spam
After global assignment: nonlocal spam
In global scope: global spam
Class
- https://docs.python.org/3/tutorial/classes.html#class-objects
- Improve Your Python: Python Classes and Object Oriented Programming
Basic Class
class Customer(object):
"""A customer of ABC Bank with a checking account. Customers have the
following properties:
Attributes:
name: A string representing the customer's name.
balance: A float tracking the current balance of the customer's account.
"""
def __init__(self, name, balance=0.0):
"""Return a Customer object whose name is *name* and starting
balance is *balance*."""
self.name = name
self.balance = balance
def withdraw(self, amount):
"""Return the balance remaining after withdrawing *amount*
dollars."""
if amount > self.balance:
raise RuntimeError('Amount greater than available balance.')
self.balance -= amount
return self.balance
def deposit(self, amount):
"""Return the balance remaining after depositing *amount*
dollars."""
self.balance += amount
return self.balance
Static variables and methods
class Car(object):
# Static variable
wheels = 4
def __init__(self, make, model):
self.make = make
self.model = model
# Static method
@staticmethod
def make_car_sound():
print('VRooooommmm!')
Abstract Base Class
from abc import ABCMeta, abstractmethod
class Vehicle(object):
"""A vehicle for sale by Jeffco Car Dealership.
Attributes:
wheels: An integer representing the number of wheels the vehicle has.
miles: The integral number of miles driven on the vehicle.
make: The make of the vehicle as a string.
model: The model of the vehicle as a string.
year: The integral year the vehicle was built.
sold_on: The date the vehicle was sold.
"""
__metaclass__ = ABCMeta
base_sale_price = 0
def sale_price(self):
"""Return the sale price for this vehicle as a float amount."""
if self.sold_on is not None:
return 0.0 # Already sold
return 5000.0 * self.wheels
def purchase_price(self):
"""Return the price for which we would pay to purchase the vehicle."""
if self.sold_on is None:
return 0.0 # Not yet sold
return self.base_sale_price - (.10 * self.miles)
@abstractmethod
def vehicle_type():
""""Return a string representing the type of vehicle this is."""
pass
Tips
- Try not to ntroduce a new attribute outside of the init method
Line break
('hello '
'world')
=> 'hello world'
Use it in assert
: How to format a python assert statement that complies with PEP8?
def afunc(some_param_name):
assert isinstance(some_param_name, SomeClassName), (
'some_param_name must be an instance of SomeClassName, '
'silly goose!')
Functional programming
Take keras.layers.merge.Concatenate
as example.
def Concatenate(axis=-1):
def afunc(x):
return concatenate(x, axis=-1)
return afunc
So Concatenate(axis=-1)([a, b])
is equal with concatenate([a, b], axis=-1)
Just guess, haven’t checked the source code.
switch in Python
Replacements for switch statement in Python?
def f(x):
return {
'a': 1,
'b': 2
}.get(x, 9) # 9 is default if x not found
Relative import?
In jupyter notebook, if you want to import A.m module in A.B.n, try this.
import sys
sys.path.append('..')
import m
Concatenate empty array in numpy
Sometimes you want to do concat
from scratch.
arr = np.array([]).reshape([0, 3])
for i in range(3):
arr = np.vstack([arr, np.random.rand(1, 3)])
print arr.shape
# (3, 3)
Path
1. Get common used path
import os
# current path
os.getcwd()
os.abspath('.')
# parent path
os.dirname(os.getcwd())
2. Iterate files
os.listdir
os.path.join
os.path.abspath
os.isdir
os.isfile
import os
root = '../..'
for f in os.listdir(root):
path = os.path.join(root, f)
full = os.path.abspath(path)
print full
if os.path.isdir(full):
print 'Is Directory \n'
elif os.path.isfile(full):
print 'Is File \n'
3. Get path in module
# /code/path.py
import os
print os.path.abspath(__file__)
# Output: /code/path.py
trim
' silver '.strip()
# 'silver'
' silver '.lstrip()
# 'silver '
' silver '.rstrip()
# ' silver'
Count list
Method 1. Use Counter
from collections import Counter
L = ['apple','red','apple','red','red','pear']
count = Counter(L)
Method 2. Use defaultdict (fastest), 3 times faster than Counter
from collections import defaultdict
count = defaultdict(int)
for i in L:
count[i] += 1
Set
a = {1, 2, 3}
b = {3, 4, 5}
print '%15s:'%('intersection'), a & b
print '%15s:'%('exclusive or'), a ^ b
print '%15s:'%('union'), a | b
print '%15s:'%('intersection'), a - b
# intersection: set([3])
# exclusive or: set([1, 2, 4, 5])
# union: set([1, 2, 3, 4, 5])
# intersection: set([1, 2])
fast sorted
When sorting a unique list, sorted(set(l))
is faster than sorted(l)
.
CountVectorizer
a = ['1', '1', '1 3 4', '1 3', '1 2 3']
cv = CountVectorizer()
cv.fit_transform(a)
# ValueError: empty vocabulary; perhaps the documents only contain stop words
cv = CountVectorizer(token_pattern='(?u)\\b\\w+\\b')
cv.fit_transform(a)
Cartesian product
product
from itertools import product
x = ['a', 'b', 'c']
y = list(product(x, x)) # Cartesian product
for item in zip(x, x):
y.remove(item)
print y
# [('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')]
combinations
from itertools import combinations
x = ['a', 'b', 'c']
print list(combinations(x, 2))
# [('a', 'b'), ('a', 'c'), ('b', 'c')]
Time run
from datetime import datetime
start_time = datetime.now()
# INSERT YOUR CODE
time_elapsed = datetime.now() - start_time
print('Time elapsed %s' % (time_elapsed))
Python package searching order
The default order is:
'<current path>' # The directory of current script if avaiable else empty as `''`
# Not avaiable eg: if the interpreter is invoked interactively
# or if the script is read from standard input
[...] # <path> in $PYTHONPATH
'<entry path>' # The directory where you startup `python`, `ipython`.
# More precislly, the `entry` of the program.
[...] # <path> installation-dependent default
This is exactly the order of sys.path
.