Script for json to csv for weather underground API fetching historical data
2013-12-16
Based on this https://github.com/PythonJournos/LearningPython/blob/master/tutorials/convert_json_to_csv.py, the script as follows,
import urllib2 import json import csv outfile_path='history.csv' writer = csv.writer(open(outfile_path, 'w')) headers = ['date'] writer.writerow(headers) req = urllib2.Request("http://api.wunderground.com/api/YOUR_KEY/history_20131001/q/India/Coimbatore.json") opener = urllib2.build_opener() f = opener.open(req) data = json.load(f) for history in data['history']['observations']: row = [] row.append(str(history['date']['pretty'])) row.append(str(history['tempm'])) writer.writerow(row)
Now the URL has to be iterated to give a range of historical data required, and most important the date range has to set.
The date range constructed from this answer http://stackoverflow.com/questions/1060279/iterating-through-a-range-of-dates-in-python
from datetime import date from dateutil.rrule import rrule, DAILY a = date(2009, 5, 30) b = date(2009, 6, 9) for dt in rrule(DAILY, dtstart=a, until=b): print dt.strftime("%Y-%m-%d")
The URL itration based on this answer http://stackoverflow.com/questions/16632569/for-loops-in-python-to-read-long-url-from-shortened-url
import urllib2 from BeautifulSoup import BeautifulSoup for x in ('20131011','20131012'): shortURL = 'http://api.wunderground.com/api/4d09b615cbc7726e/history_'+str(x)+'/q/India/Coimbatore.json' output = urllib2.urlopen(shortURL) print output.url
now the problem is how to take the for loop date range into the URL for looping, long searched for making a list from the print
dt.strftime(“%Y-%m-%d”)
finally got write by the empty array. Append, hoorah! http://learnpythonthehardway.org/book/ex32.html
import urllib2 from datetime import date from dateutil.rrule import rrule, DAILY a = date(2009, 6, 3) b = date(2009, 6, 9) dtm = [] for dt in rrule(DAILY, dtstart=a, until=b): print dt.strftime(“%Y%m%d”) dtm.append(dt.strftime(“%Y%m%d”)) print dtm for x in (dtm): shortURL = ‘http://api.wunderground.com/api/YOURKEY/history_’+str(x)+‘/q/India/Coimbatore.json’ output = urllib2.urlopen(shortURL) print output.url ``` the out put looks 20090603 20090604 ——— 20090609 [‘20090603’, ‘20090604’, ‘20090605’, ‘20090606’, ‘20090607’, ‘20090608’, ‘20090609’] http://api.wunderground.com/api/YOURKEY/history_20090603/q/India/Coimbatore.json —————- http://api.wunderground.com/api/YOURKEY/history_20090609/q/India/Coimbatore.json
now the challenge is how to integrate the above code with csv write code. The solution is another for loop for urllib. the final code is
import urllib2 import json import csv from datetime import date from dateutil.rrule import rrule, DAILY outfile_path='history.csv' writer = csv.writer(open(outfile_path, 'w')) headers = ['TimeIST','TemperatureC','Dew PointC','Humidity','Wind SpeedKm/h','Gust SpeedKm/h','Wind DirectionDe','Wind Direction','VisibilityKm','Sea Level PressurehPa','Events','Heatindex','Precipitationmm','Conditions'] writer.writerow(headers) a = date(2013, 8, 1) b = date(2013, 8, 2) dtm = [] for dt in rrule(DAILY, dtstart=a, until=b): dtm.append(dt.strftime("%Y%m%d")) for x in (dtm): url = 'http://api.wunderground.com/api/4d09b615cbc7726e/history_'+str(x)+'/q/India/Coimbatore.json' req = urllib2.Request(url) opener = urllib2.build_opener() f = opener.open(req) data = json.load(f) for history in data['history']['observations']: row = [] datewu = history['date']['year']+'-'+history['date']['mon']+'-'+history['date']['mday']+'T'+history['date']['hour']+':'+history['date']['min']+':00.000000+0530' row.append(str(datewu)) row.append(str(history['tempm'])) row.append(str(history['dewptm'])) row.append(str(history['hum'])) row.append(str(history['wspdm'])) row.append(str(history['wgustm'])) row.append(str(history['wdird'])) row.append(str(history['wdire'])) row.append(str(history['vism'])) row.append(str(history['pressurem'])) row.append(str(history['windchillm'])) row.append(str(history['heatindexm'])) row.append(str(history['precipm'])) row.append(str(history['conds'])) writer.writerow(row)
The above code does not do the job; it only takes the last date from the date range, the code has to change. The final working code
import urllib2 import json import csv from datetime import date from dateutil.rrule import rrule, DAILY outfile_path=‘history1.csv’ writer = csv.writer(open(outfile_path, ‘w’)) headers = [‘TimeIST’,‘TemperatureC’,‘Dew PointC’,‘Humidity’,‘Wind SpeedKm/h’,‘Gust SpeedKm/h’,‘Wind DirectionDe’,‘Wind Direction’,‘VisibilityKm’,‘Sea Level PressurehPa’,‘Events’,‘Heatindex’,‘Precipitationmm’,‘Conditions’] writer.writerow(headers) a = date(2013, 8, 5) b = date(2013, 8, 10) dtm = [] for dt in rrule(DAILY, dtstart=a, until=b): # print dt.strftime(“%Y%m%d”) dtm.append(dt.strftime(“%Y%m%d”)) dtl = [] for x in (dtm): shortURL = ‘http://api.wunderground.com/api/4d09b615cbc7726e/history_’+str(x)+‘/q/India/Coimbatore.json’ output = urllib2.urlopen(shortURL) # print output.url dtl.append(output.url) print dtl dtd = [] for url in (dtl): req = urllib2.Request(url) opener = urllib2.build_opener() f = opener.open(req) data = json.load(f) # print data dtd.append(data) for d in (dtd): for history in d[‘history’][‘observations’]: if d != history: row = [] datewu = history[‘date’][‘year’]+’-‘+history[’date’][‘mon’]+’-‘+history[’date’][‘mday’]+‘T’+history[‘date’][‘hour’]+’:‘+history[’date’][‘min’]+’:00.000000+0530’ row.append(str(datewu)) row.append(str(history[‘tempm’])) row.append(str(history[‘dewptm’])) row.append(str(history[‘hum’])) row.append(str(history[‘wspdm’])) row.append(str(history[‘wgustm’])) row.append(str(history[‘wdird’])) row.append(str(history[‘wdire’])) row.append(str(history[‘vism’])) row.append(str(history[‘pressurem’])) row.append(str(history[‘windchillm’])) row.append(str(history[‘heatindexm’])) row.append(str(history[‘precipm’])) row.append(str(history[‘conds’])) writer.writerow(row) print data
Create a csv from Json for WU current observations, the nonworking code for the past 5 hours
import urllib2 import json import csv from datetime import date #from dateutil.rrule import rrule, DAILY outfile_path='CART.DAT' writer = csv.writer(open(outfile_path, 'w')) headers = ['urn:ogc:def:parameter:x-istsos:1.0:time:iso8601','urn:ogc:def:parameter:x-istsos:1.0:cbe:aws:dewpont','urn:ogc:def:parameter:x-ists$ writer.writerow(headers) req = urllib2.Request("http://api.wunderground.com/api/4d09b615cbc7726e/conditions/q/India/Coimbatore.json") opener = urllib2.build_opener() f = opener.open(req) data = json.load(f) for current_observation in data['current_observation']: print ['station_id']
one working code for converting json into csv
import csv import json x="""[ { "pk": 22, "model": "auth.permission", "fields": { "codename": "add_logentry", "name": "Can add log entry", "content_type": 8 } }, { "pk": 23, "model": "auth.permission", "fields": { "codename": "change_logentry", "name": "Can change log entry", "content_type": 8 } }, { "pk": 24, "model": "auth.permission", "fields": { "codename": "delete_logentry", "name": "Can delete log entry", "content_type": 8 } } ]""" x = json.loads(x) f = csv.writer(open("test.csv", "wb+")) # Write CSV Header, If you dont need that, remove this line f.writerow(["pk", "model", "codename", "name", "content_type"]) for x in x: f.writerow([x["pk"], x["model"], x["fields"]["codename"], x["fields"]["name"], x["fields"]["content_type"]])
always getting
TypeError: string indices must be integers
hrrr! finally got working code based on experience that, with current json file of sin gle row, it is unnecessary and erroneous to callfor loop
, (took 5 hours to realize) and as per this answer http://stackoverflow.com/questions/14784334/python-csv-error-sequence-expected so the working code is as followsNow having problem in converting the date time formate of wu date formate to istsos’s. followed this for converting last answer, http://stackoverflow.com/questions/13350909/convert-other-time-values-to-datetime-format-in-python but that answer has a mistake, in specifying the shortened version of month, it should be %b not %m as specified in the answer. this is from http://www.lightbird.net/py-by-example/datetime.date-module.html so the final code for converting the date and making a csv.DAT is this
import csv import json import urllib2 from datetime import datetime outfile_path='CART.DAT' writer = csv.writer(open(outfile_path, 'w')) headers = ['urn:ogc:def:parameter:x-istsos:1.0:time:iso8601','urn:ogc:def:parameter:x-istsos:1.0:cbe:aws:dewpont'] writer.writerow(headers) req = urllib2.Request("http://api.wunderground.com/api/4d09b615cbc7726e/conditions/q/India/Coimbatore.json") opener = urllib2.build_opener() f = opener.open(req) data = json.load(f) #for data in data: dateNF = data['current_observation']['observation_time_rfc822'].strip( '+0530' ); print dateNF dateITS = datetime.strptime (dateNF, "%a, %d %b %Y %H:%M:%S ").strftime("%Y-%m-%dT%H:%M:%S.000000+0530") print dateITS rows = [dateITS,data["current_observation"]["temp_c"]] writer.writerow(rows)