Monday 13 January 2014

Word frequencies

##################################################
#  Looking briefly at word frequencies
#
#
#  JHPS
#  C:  13/01/2014
#
##################################################

import os,string
from collections import Counter
from pylab import *

#### Overall word usage

## Open files in dict

local_files = os.listdir('.')
posts = {}

for file in local_files:
    if '.txt' in file:
      posts[file] = open(file,'r')

## pull all the words out

exclude = set(string.punctuation)
words = []

for post_key in posts:
  post = posts[post_key]
  for line in post:
    wds_ls = line.lower().strip('\n').split(' ')
    wds_ls = [''.join(ch for ch in e if ch not in exclude) for e in wds_ls]
    words += wds_ls

## Total word count is:
print len(words)

## Counter word frequencies
print Counter(words)

###  Looking the top, keywords over time
## Open files in dict

local_files = os.listdir('.')
posts = {}

for file in local_files:
    if '.txt' in file:
      posts[file] = open(file,'r')

## key words
data = [] 
technology = []
information = []
environment = []
system = []
internet = []
example = []
things = []
post_cnt = []
time = []
computational = []

for post_key in posts:
  post = posts[post_key]

  post_words = []
   
  for line in post:
    wds_ls = line.lower().strip('\n').split(' ')
    wds_ls = [''.join(ch for ch in e if ch not in exclude) for e in wds_ls]
    post_words += wds_ls

  data_count = 0

  counted = Counter(post_words)
 
  data.append(counted['data'])
  technology.append(counted['technology'])
  information.append(counted['information'])
  environment.append(counted['environment']+counted['environmental'])
  system.append(counted['system']+counted['systems'])
  internet.append(counted['internet'])
  example.append(counted['example'])
  things.append(counted['things'])
  post_cnt.append(counted['post'])
  time.append(counted['time']) 
  computational.append(counted['computational']) 

## Plotting these

fig = plt.figure(figsize=(15, 7))

ax1 = fig.add_subplot(1,2,1)
plot(data,label='Data')
plot(technology,label='Technology')
plot(information,label='Information')
plot(environment,label='Environment/Environmental')
plot(system,label='System')
plot(internet,label='Internet')

xticks(range(13),range(1,14))
xlabel('Posts')
ylabel('Word frequency')

ax1.legend(bbox_to_anchor=(0.4, 0, 0, 0), bbox_transform=gcf().transFigure)

ax2 = fig.add_subplot(1,2,2,sharey=ax1)

plot(example,label='Example')
plot(things,label='Things')
plot(post_cnt,label='Post')
plot(time,label='Time')
plot(computational,label='Computational')

xticks(range(13),range(1,14))
xlabel('Posts')

ax2.legend(bbox_to_anchor=(0.8, 0, 0, 0), bbox_transform=gcf().transFigure)

show()

Wednesday 27 November 2013

Map drawing

#########################################################
#  Plotting points on Lambert Conformal projection with blue marble image
#
#
#  JHPS
#  C: 26/11/2013

#######################################################

from mpl_toolkits.basemap import Basemap
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


#Points
stops = pd.read_csv('stops.csv')      #This is just a csv (e.g. excel) file with columns 'NAME', 'LON', and 'LAT'

num = 0                                           # num picks the element in the stops list that you want to plot (starting from 0)
lon_c = stops['LON'][num]
lat_c = stops['LAT'][num]

#Setup
fig = plt.figure()
#map = Basemap(resolution='c',projection='ortho',lat_0=60.,lon_0=-60.)
ax = fig.add_axes([0.1,0.1,0.8,0.8])

map = Basemap(llcrnrlon=lon_c - 60,llcrnrlat=30,urcrnrlon=lon_c + 60,urcrnrlat=70.000,\
            rsphere=(6378137.00,6356752.3142),\
            resolution='l',area_thresh=1000.,projection='lcc',\
            lat_1=10.,lon_0=lon_c,ax=ax)

map.drawparallels(np.arange(-90.,91.,30.))
map.drawmeridians(np.arange(-180.,181.,60.))

parallels = np.arange(0.,80,20.)

map.drawparallels(parallels,labels=[1,0,0,1])
meridians = np.arange(10.,360.,30.)
map.drawmeridians(meridians,labels=[1,0,0,1])

map.bluemarble()

map.drawcountries(linewidth=1)


x,y = map(stops['LON'][1],stops['LAT'][1])

map.plot(x,y,'r.',markersize=20)

plt.show()



Tuesday 22 October 2013

Data assimilation: Interpolation example

########################################################
# Demonstration of interpolation for Hercule's cyborg
#  prints three plots for different input data and resulting
#  interpolations
#
#
# JHPS
# C: 22/10/2013
#######################################################

import numpy as np
from scipy.interpolate import Rbf
import matplotlib.pyplot as plt
from matplotlib import cm

### After first walk with points A, B and C
x = np.array([-1,1.5,-1])
y = np.array([1,0,-1])
z = np.array([5,0,-5])

ti = np.linspace(-2.0, 2.0, 100)
XI, YI = np.meshgrid(ti, ti)

# Interpolation
rbf = Rbf(x, y, z, epsilon=2)
ZI = rbf(XI, YI)

# Plotting
plt.figure()
n = plt.normalize(-2., 2.)
plt.subplot(1, 1, 1)
plt.pcolor(XI, YI, ZI, cmap=cm.jet)
plt.scatter(np.array([0]), np.array([0]), s=100, facecolors='none', edgecolors='k')
plt.scatter(x, y, 100, z, cmap=cm.jet)
plt.plot(np.array([0]),np.array([0]))
plt.xlim(-2, 2)
plt.ylim(-2, 2)
plt.colorbar()

labels = ['A','B','C']

for i in range(len(x)):
        plt.annotate(labels[i], xy=(x[i], y[i]),  xycoords='data',
            xytext=(-20, 10), textcoords='offset points',
            arrowprops=dict(arrowstyle="->")
            )

plt.annotate('D', xy=(0, 0),  xycoords='data',
            xytext=(-20, 10), textcoords='offset points',
            arrowprops=dict(arrowstyle="->")
            )


# Second walk with another point between A and B
x = np.array([-1,1.5,-1,0.5])
y = np.array([1,0,-1,1.5])
z = np.array([5,0,-5,3])

ti = np.linspace(-2.0, 2.0, 100)
XI, YI = np.meshgrid(ti, ti)

# Interpolation
rbf = Rbf(x, y, z, epsilon=2)
ZI = rbf(XI, YI)

# Plotting
plt.figure()
n = plt.normalize(-2., 2.)
plt.subplot(1, 1, 1)
plt.pcolor(XI, YI, ZI, cmap=cm.jet)
plt.scatter(np.array([0]), np.array([0]), s=100, facecolors='none', edgecolors='k')
plt.scatter(x, y, 100, z, cmap=cm.jet)
plt.plot(np.array([0]),np.array([0]))
plt.xlim(-2, 2)
plt.ylim(-2, 2)
plt.colorbar()

plt.annotate('New point', xy=(x[-1], y[-1]),  xycoords='data',
            xytext=(20, -10), textcoords='offset points',
            arrowprops=dict(arrowstyle="->")
            )


plt.annotate('D', xy=(0, 0),  xycoords='data',
            xytext=(-20, 10), textcoords='offset points',
            arrowprops=dict(arrowstyle="->")
            )


# Third walk with another point before A
x = np.array([-1.5,-1,1.5,-1])
y = np.array([1.5,1,0,-1])
z = np.array([-5,5,0,-5])

ti = np.linspace(-2.0, 2.0, 100)
XI, YI = np.meshgrid(ti, ti)

# Interpolation
rbf = Rbf(x, y, z, epsilon=2)
ZI = rbf(XI, YI)

# Plotting
plt.figure()
n = plt.normalize(-2., 2.)
plt.subplot(1, 1, 1)
plt.pcolor(XI, YI, ZI, cmap=cm.jet)
plt.scatter(np.array([0]), np.array([0]), s=100, facecolors='none', edgecolors='k')
plt.scatter(x, y, 100, z, cmap=cm.jet)
plt.plot(np.array([0]),np.array([0]))
plt.xlim(-2, 2)
plt.ylim(-2, 2)
plt.colorbar()


plt.annotate('a different new point', xy=(x[0], y[0]),  xycoords='data',
            xytext=(20, 0), textcoords='offset points',
            arrowprops=dict(arrowstyle="->")
            )


plt.annotate('D', xy=(0, 0),  xycoords='data',
            xytext=(-20, 10), textcoords='offset points',
            arrowprops=dict(arrowstyle="->")
            )

## show plots

plt.show()