In [4]:
# The usual stuff:
import pandas as pd
import numpy as np

# For this you have to do "pip install pandasql" or "conda install pandasql"
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())

# Do imports and set up a test DataFrame
cols = ['x' + n for n in list('01234')]
indexes = ['y' + n for n in list('0123456789')]
np.random.seed(42)
df = pd.DataFrame(np.random.rand(10,5), columns=cols, index=indexes)
df
Out[4]:
x0 x1 x2 x3 x4
y0 0.374540 0.950714 0.731994 0.598658 0.156019
y1 0.155995 0.058084 0.866176 0.601115 0.708073
y2 0.020584 0.969910 0.832443 0.212339 0.181825
y3 0.183405 0.304242 0.524756 0.431945 0.291229
y4 0.611853 0.139494 0.292145 0.366362 0.456070
y5 0.785176 0.199674 0.514234 0.592415 0.046450
y6 0.607545 0.170524 0.065052 0.948886 0.965632
y7 0.808397 0.304614 0.097672 0.684233 0.440152
y8 0.122038 0.495177 0.034389 0.909320 0.258780
y9 0.662522 0.311711 0.520068 0.546710 0.184854
In [6]:
# Can now do selects 
pysqldf("SELECT * FROM df LIMIT 3;")
Out[6]:
x0 x1 x2 x3 x4
0 0.374540 0.950714 0.731994 0.598658 0.156019
1 0.155995 0.058084 0.866176 0.601115 0.708073
2 0.020584 0.969910 0.832443 0.212339 0.181825
In [8]:
pysqldf("SELECT * FROM df where x0 < .3 LIMIT 3;")
Out[8]:
x0 x1 x2 x3 x4
0 0.155995 0.058084 0.866176 0.601115 0.708073
1 0.020584 0.969910 0.832443 0.212339 0.181825
2 0.183405 0.304242 0.524756 0.431945 0.291229
In [11]:
pysqldf("select max(x0), max(x1) from df;")
Out[11]:
max(x0) max(x1)
0 0.808397 0.96991
In [14]:
# Updates are NOT allowed!
# Don't try this:
# pysqldf("update df set x0 = 5")