Python: Script to pull text data
$begingroup$
This is built with Python 2.7.15.
The goal of this script is to count the number of words spoken by each Senator on the floor of Congress between given dates.
It pulls from the Congressional Record, cuts out the sections not spoken on the floor (eg lists of Amendments) and returns a count of words by senator. The current list of senators is from the 115th Congress, from 2017 to 2018
# -*- coding: utf-8 -*-
from requests import get # to make GET request
import time
from datetime import date, timedelta
import os.path
import textract
import re
import csv
import sys
#download URLs
def downloadPDF(url, file_name):
#get request
response = get(url)
content_type = response.headers.get('content-type')
if content_type == "application/pdf":
# open in binary mode
save_path = '/Users/One/Document/Workspace/Projects/Senate/Congressional_Record/'
path = os.path.join(save_path, file_name)
with open(path, "wb") as file:
#write to file
file.write(response.content)
return 'SUCCESS'
#download congressional records and extract text
def downloadConRecords(startDate, endDate): #YYYYMMDD
#initialize lists
congressionalRecords =
dates =
#convert args to date-type
startDate = str(startDate)
endDate = str(endDate)
startDate = date(int(startDate[0:4]),int(startDate[4:6]),int(startDate[6:8]))
endDate = date(int(endDate[0:4]),int(endDate[4:6]),int(endDate[6:8]))
#list all the days to check
range = endDate - startDate
i=0
while i <= range.days:
dates += [str(startDate + timedelta(days=i))]
i+=1
#try URL, download PDF, add to list object
for i in dates:
YYYY = i[0:4]
MM = i[5:7]
DD = i[8:10]
url = ('https://www.congress.gov/crec/' + YYYY + '/'+ MM + '/' + DD + '/CREC-' + YYYY + '-' + MM + '-' + DD + '-senate.pdf')
filename = (YYYY+MM+DD+".pdf")
time.sleep(20)
if downloadPDF(url,filename) == "SUCCESS":
print(url)
textExtract =
textExtract = textract.process('/Users/One/Document/Workspace/Projects/Senate/Congressional_Record/' + filename)
#makes a list with the filename and the text
entry = [url, filename[:-4].strip(), textExtract]
congressionalRecords += [entry]
return congressionalRecords
#process the congressional records
def cleanRecords(congressionalRecords, phraseDelete, segmentStart, segmentEnd):
#eliminate n/, phrases, segments
for record in congressionalRecords:
clean =
dump =
clean = record[2].split('n')
clean = " ".join(clean)
for i in phraseDelete:
clean = clean.replace(i,'')
for start in enumerate(segmentStart):
erase = re.findall('{}.*?{}'.format(start[1], segmentEnd), clean)
clean = re.sub('{}.*?{}'.format(start[1], segmentEnd), ' ', clean)
dump += [erase]
record += [clean]
record += [dump]
return congressionalRecords
#split the text into sections based on the speaker
def multiDelimStringSplitter(aString, separators):
# separators is an array of strings that are being used to split the the string.
# sort separators in order of descending length
separators.sort(key=len)
listToReturn =
rows =
l =
i = 0
while i < len(aString):
theSeparator = ""
for current in separators:
if current == aString[i:i+len(current)]:
#if this section is the separateor, then set the variable to that separator
theSeparator = current
if theSeparator != "":
listToReturn += [theSeparator]
i = i + len(theSeparator)
else:
if listToReturn == :
listToReturn = [""]
if(listToReturn[-1] in separators):
listToReturn += [""]
#adds the text one character at a time
listToReturn[-1] += aString[i]
i += 1
i = 0
listToReturn = listToReturn[1:len(listToReturn)]
while (i+1) < len(listToReturn):
#initialize entry with speaker and text
entry = listToReturn[i:i+2]
#get word count and add it to the entry
text = listToReturn[i+1]
#strip punctuation out of the string
text = re.sub(r'[^ws]','',text)
wordcount = str(len(text.split()))
entry += [wordcount]
#add the entry as its own list
rows.append(entry)
i += 2
return rows
#get information on the records, speakers and word counts
def writeAnalytics(congressionalRecords, senators):
textExport = [['url','date','extractRaw','extractClean']]
textDump = [['date','extractDump']]
speechExport = [['date','speaker','text','wordCount']]
speakerExport = [['date','speaker','wordCount']]
speakerAnalytics = [['startDate', 'endDate', 'speaker', 'wordCount']]
perSpeakerTotal =
recordDates =
for i in senators:
# wordsperSpeech += [i,0]
perSpeakerTotal += [i,0]
#add delimiter separated text extracts
for record in congressionalRecords:
splitLines =
wordsperSpeaker =
#initialize the lists
for i in senators:
# wordsperSpeech += [i,0]
wordsperSpeaker += [i,0]
#split the extracts into each speech
splitLines = multiDelimStringSplitter(record[3],senators)
record += [splitLines]
#prep the dump export
for i in record[4]:
if len(i) > 0:
textDump += [[record[1], i]]
#add the words per speaker
for speech in record[5]:
speechExport += [[record[1],speech[0],speech[1],speech[2]]]
#find the speaker -> pull that person's wordsperSpeaker and add the speech's word count
wordsperSpeaker[wordsperSpeaker.index(speech[0])+1] += int(speech[2])
record += [wordsperSpeaker]
textExport += [[record[0],record[1],record[2],record[3]]]
recordDates += [record[1]]
#prepare export for the words per speaker
for i in senators:
speakerExport += [[record[1],i, wordsperSpeaker[wordsperSpeaker.index(i)+1]]]
#add word counts for senators who spoke
#find senator's index in perSpeaker total -> add the words spoken in that session (record)
perSpeakerTotal[perSpeakerTotal.index(i)+1] += record[6][record[6].index(i)+1]
for i in senators:
speakerAnalytics += [[min(recordDates), max(recordDates), i, perSpeakerTotal[perSpeakerTotal.index(i)+1]]]
return textExport, textDump, speechExport, speakerExport, speakerAnalytics
#export to CSV
def exportCSV(flatList, filename):
with open(filename+".csv", "wb") as f:
writer = csv.writer(f)
writer.writerows(flatList)
phraseDelete = [
',',
'This ‘‘bullet’’ symbol identifies statements or insertions which are not spoken by a Member of the Senate on the floor.'
]
segmentStart = [
' PRAYER ',
' PLEDGE OF ALLEGIANCE ',
' APPOINTMENT OF ACTING PRESIDENT PRO TEMPORE ',
' CERTIFICATES OF ELECTION ',
' MESSAGE FROM THE HOUSE ',
' MESSAGES FROM THE PRESIDENT ',
' EXECUTIVE MESSAGES REFERRED ',
' LIST OF SENATORS BY STATES ',
' PRESIDENTIAL MESSAGES ',
' ENROLLED BILLS PRESENTED ',
' MEASURES REFERRED ',
' EXECUTIVE AND OTHER COMMUNICATIONS ',
' REPORTS OF COMMITTEES ',
' EXECUTIVE REPORTS OF COMMITTEES ',
' AMENDMENTS SUBMITTED AND PROPOSED ',
' INTRODUCTION OF BILLS AND JOINT RESOLUTIONS ',
' ADDITIONAL COSPONSORS ',
' SUBMITTED RESOLUTIONS ',
' SUBMISSION OF CONCURRENT AND SENATE RESOLUTIONS ',
' SENATE RESOLUTION ',
' TEXT OF AMENDMENTS ',
' APPOINTMENT ',
' AUTHORITY FOR COMMITTEES TO MEET ',
' CONFIRMATION ',
' NOMINATION '
]
segmentEnd = ' f '
senators = ['Mr. SESSIONS', 'Mr. STRANGE', 'Mr. JONES', 'Mr. SHELBY', 'Mr. SULLIVAN', 'Ms. MURKOWSKI',
'Mr. FLAKE', 'Mr. MCCAIN', 'Mr. KYL', 'Mr. COTTON', 'Mr. BOOZMAN', 'Mrs. FEINSTEIN', 'Ms. HARRIS',
'Mr. GARDNER', 'Mr. BENNET', 'Mr. MURPHY', 'Mr. BLUMENTHAL', 'Mr. CARPER', 'Mr. COONS', 'Mr. NELSON', 'Mr. RUBIO',
'Mr. PERDUE', 'Mr. ISAKSON', 'Ms. HIRONO', 'Mr. SCHATZ', 'Mr. RISCH', 'Mr. CRAPO', 'Mr. DURBIN',
'Ms. DUCKWORTH', 'Mr. DONNELLY', 'Mr. YOUNG', 'Mrs. ERNST', 'Mr. GRASSLEY', 'Mr. ROBERTS', 'Mr. MORAN',
'Mr. MCCONNELL', 'Mr. PAUL', 'Mr. CASSIDY', 'Mr. KENNEDY', 'Mr. KING', 'Ms. COLLINS', 'Mr. CARDIN',
'Mr. VAN HOLLEN', 'Ms. WARREN', 'Mr. MARKEY', 'Ms. STABENOW', 'Mr. PETERS', 'Ms. KLOBUCHAR',
'Mr. FRANKEN', 'Ms. SMITH', 'Mr. WICKER', 'Mr. COCHRAN', 'Mrs. HYDE-SMITH', 'Mrs. MCCASKILL',
'Mr. BLUNT', 'Mr. TESTER', 'Mr. DAINES', 'Mrs. FISCHER', 'Mr. SASSE', 'Mr. HELLER', 'Ms. CORTEZ MASTO',
'Mrs. SHAHEEN', 'Ms. HASSAN', 'Mr. MENENDEZ', 'Mr. BOOKER', 'Mr. HEINRICH', 'Mr. UDALL',
'Mrs. GILLIBRAND', 'Mr. SCHUMER', 'Mr. TILLIS', 'Mr. BURR', 'Ms. HEITKAMP', 'Mr. HOEVEN',
'Mr. BROWN', 'Mr. PORTMAN', 'Mr. INHOFE', 'Mr. LANKFORD', 'Mr. MERKLEY', 'Mr. WYDEN',
'Mr. CASEY', 'Mr. TOOMEY', 'Mr. WHITEHOUSE', 'Mr. REED', 'Mr. GRAHAM',
'Mr. SCOTT', 'Mr. ROUNDS', 'Mr. THUNE', 'Mr. CORKER', 'Mr. ALEXANDER', 'Mr. CRUZ',
'Mr. CORNYN', 'Mr. HATCH', 'Mr. LEE', 'Mr. SANDERS', 'Mr. LEAHY', 'Mr. KAINE',
'Mr. WARNER', 'Ms. CANTWELL', 'Mrs. MURRAY', 'Mr. MANCHIN', 'Mrs. CAPITO',
'Ms. BALDWIN', 'Mr. JOHNSON', 'Mr. BARRASSO', 'Mr. ENZI',
'The ACTING PRESIDENT', 'The PRESIDING OFFICER', 'The VICE PRESIDENT'
'Executive nominations confirmed by']
startDate = '20170106'
endDate = '20170130'
conRecords = downloadConRecords(startDate, endDate)
cleanConRecords = cleanRecords(conRecords, phraseDelete, segmentStart, segmentEnd)
textExport, textDump, speechExport, speakerExport, speakerAnalytics = writeAnalytics(conRecords, senators)
exportCSV(textExport,'textExport')
exportCSV(textDump,'textDump')
exportCSV(speechExport,'speechExport')
exportCSV(speakerExport,'speakerExport')
exportCSV(speakerAnalytics,'speakerAnalytics')
python beginner
New contributor
$endgroup$
add a comment |
$begingroup$
This is built with Python 2.7.15.
The goal of this script is to count the number of words spoken by each Senator on the floor of Congress between given dates.
It pulls from the Congressional Record, cuts out the sections not spoken on the floor (eg lists of Amendments) and returns a count of words by senator. The current list of senators is from the 115th Congress, from 2017 to 2018
# -*- coding: utf-8 -*-
from requests import get # to make GET request
import time
from datetime import date, timedelta
import os.path
import textract
import re
import csv
import sys
#download URLs
def downloadPDF(url, file_name):
#get request
response = get(url)
content_type = response.headers.get('content-type')
if content_type == "application/pdf":
# open in binary mode
save_path = '/Users/One/Document/Workspace/Projects/Senate/Congressional_Record/'
path = os.path.join(save_path, file_name)
with open(path, "wb") as file:
#write to file
file.write(response.content)
return 'SUCCESS'
#download congressional records and extract text
def downloadConRecords(startDate, endDate): #YYYYMMDD
#initialize lists
congressionalRecords =
dates =
#convert args to date-type
startDate = str(startDate)
endDate = str(endDate)
startDate = date(int(startDate[0:4]),int(startDate[4:6]),int(startDate[6:8]))
endDate = date(int(endDate[0:4]),int(endDate[4:6]),int(endDate[6:8]))
#list all the days to check
range = endDate - startDate
i=0
while i <= range.days:
dates += [str(startDate + timedelta(days=i))]
i+=1
#try URL, download PDF, add to list object
for i in dates:
YYYY = i[0:4]
MM = i[5:7]
DD = i[8:10]
url = ('https://www.congress.gov/crec/' + YYYY + '/'+ MM + '/' + DD + '/CREC-' + YYYY + '-' + MM + '-' + DD + '-senate.pdf')
filename = (YYYY+MM+DD+".pdf")
time.sleep(20)
if downloadPDF(url,filename) == "SUCCESS":
print(url)
textExtract =
textExtract = textract.process('/Users/One/Document/Workspace/Projects/Senate/Congressional_Record/' + filename)
#makes a list with the filename and the text
entry = [url, filename[:-4].strip(), textExtract]
congressionalRecords += [entry]
return congressionalRecords
#process the congressional records
def cleanRecords(congressionalRecords, phraseDelete, segmentStart, segmentEnd):
#eliminate n/, phrases, segments
for record in congressionalRecords:
clean =
dump =
clean = record[2].split('n')
clean = " ".join(clean)
for i in phraseDelete:
clean = clean.replace(i,'')
for start in enumerate(segmentStart):
erase = re.findall('{}.*?{}'.format(start[1], segmentEnd), clean)
clean = re.sub('{}.*?{}'.format(start[1], segmentEnd), ' ', clean)
dump += [erase]
record += [clean]
record += [dump]
return congressionalRecords
#split the text into sections based on the speaker
def multiDelimStringSplitter(aString, separators):
# separators is an array of strings that are being used to split the the string.
# sort separators in order of descending length
separators.sort(key=len)
listToReturn =
rows =
l =
i = 0
while i < len(aString):
theSeparator = ""
for current in separators:
if current == aString[i:i+len(current)]:
#if this section is the separateor, then set the variable to that separator
theSeparator = current
if theSeparator != "":
listToReturn += [theSeparator]
i = i + len(theSeparator)
else:
if listToReturn == :
listToReturn = [""]
if(listToReturn[-1] in separators):
listToReturn += [""]
#adds the text one character at a time
listToReturn[-1] += aString[i]
i += 1
i = 0
listToReturn = listToReturn[1:len(listToReturn)]
while (i+1) < len(listToReturn):
#initialize entry with speaker and text
entry = listToReturn[i:i+2]
#get word count and add it to the entry
text = listToReturn[i+1]
#strip punctuation out of the string
text = re.sub(r'[^ws]','',text)
wordcount = str(len(text.split()))
entry += [wordcount]
#add the entry as its own list
rows.append(entry)
i += 2
return rows
#get information on the records, speakers and word counts
def writeAnalytics(congressionalRecords, senators):
textExport = [['url','date','extractRaw','extractClean']]
textDump = [['date','extractDump']]
speechExport = [['date','speaker','text','wordCount']]
speakerExport = [['date','speaker','wordCount']]
speakerAnalytics = [['startDate', 'endDate', 'speaker', 'wordCount']]
perSpeakerTotal =
recordDates =
for i in senators:
# wordsperSpeech += [i,0]
perSpeakerTotal += [i,0]
#add delimiter separated text extracts
for record in congressionalRecords:
splitLines =
wordsperSpeaker =
#initialize the lists
for i in senators:
# wordsperSpeech += [i,0]
wordsperSpeaker += [i,0]
#split the extracts into each speech
splitLines = multiDelimStringSplitter(record[3],senators)
record += [splitLines]
#prep the dump export
for i in record[4]:
if len(i) > 0:
textDump += [[record[1], i]]
#add the words per speaker
for speech in record[5]:
speechExport += [[record[1],speech[0],speech[1],speech[2]]]
#find the speaker -> pull that person's wordsperSpeaker and add the speech's word count
wordsperSpeaker[wordsperSpeaker.index(speech[0])+1] += int(speech[2])
record += [wordsperSpeaker]
textExport += [[record[0],record[1],record[2],record[3]]]
recordDates += [record[1]]
#prepare export for the words per speaker
for i in senators:
speakerExport += [[record[1],i, wordsperSpeaker[wordsperSpeaker.index(i)+1]]]
#add word counts for senators who spoke
#find senator's index in perSpeaker total -> add the words spoken in that session (record)
perSpeakerTotal[perSpeakerTotal.index(i)+1] += record[6][record[6].index(i)+1]
for i in senators:
speakerAnalytics += [[min(recordDates), max(recordDates), i, perSpeakerTotal[perSpeakerTotal.index(i)+1]]]
return textExport, textDump, speechExport, speakerExport, speakerAnalytics
#export to CSV
def exportCSV(flatList, filename):
with open(filename+".csv", "wb") as f:
writer = csv.writer(f)
writer.writerows(flatList)
phraseDelete = [
',',
'This ‘‘bullet’’ symbol identifies statements or insertions which are not spoken by a Member of the Senate on the floor.'
]
segmentStart = [
' PRAYER ',
' PLEDGE OF ALLEGIANCE ',
' APPOINTMENT OF ACTING PRESIDENT PRO TEMPORE ',
' CERTIFICATES OF ELECTION ',
' MESSAGE FROM THE HOUSE ',
' MESSAGES FROM THE PRESIDENT ',
' EXECUTIVE MESSAGES REFERRED ',
' LIST OF SENATORS BY STATES ',
' PRESIDENTIAL MESSAGES ',
' ENROLLED BILLS PRESENTED ',
' MEASURES REFERRED ',
' EXECUTIVE AND OTHER COMMUNICATIONS ',
' REPORTS OF COMMITTEES ',
' EXECUTIVE REPORTS OF COMMITTEES ',
' AMENDMENTS SUBMITTED AND PROPOSED ',
' INTRODUCTION OF BILLS AND JOINT RESOLUTIONS ',
' ADDITIONAL COSPONSORS ',
' SUBMITTED RESOLUTIONS ',
' SUBMISSION OF CONCURRENT AND SENATE RESOLUTIONS ',
' SENATE RESOLUTION ',
' TEXT OF AMENDMENTS ',
' APPOINTMENT ',
' AUTHORITY FOR COMMITTEES TO MEET ',
' CONFIRMATION ',
' NOMINATION '
]
segmentEnd = ' f '
senators = ['Mr. SESSIONS', 'Mr. STRANGE', 'Mr. JONES', 'Mr. SHELBY', 'Mr. SULLIVAN', 'Ms. MURKOWSKI',
'Mr. FLAKE', 'Mr. MCCAIN', 'Mr. KYL', 'Mr. COTTON', 'Mr. BOOZMAN', 'Mrs. FEINSTEIN', 'Ms. HARRIS',
'Mr. GARDNER', 'Mr. BENNET', 'Mr. MURPHY', 'Mr. BLUMENTHAL', 'Mr. CARPER', 'Mr. COONS', 'Mr. NELSON', 'Mr. RUBIO',
'Mr. PERDUE', 'Mr. ISAKSON', 'Ms. HIRONO', 'Mr. SCHATZ', 'Mr. RISCH', 'Mr. CRAPO', 'Mr. DURBIN',
'Ms. DUCKWORTH', 'Mr. DONNELLY', 'Mr. YOUNG', 'Mrs. ERNST', 'Mr. GRASSLEY', 'Mr. ROBERTS', 'Mr. MORAN',
'Mr. MCCONNELL', 'Mr. PAUL', 'Mr. CASSIDY', 'Mr. KENNEDY', 'Mr. KING', 'Ms. COLLINS', 'Mr. CARDIN',
'Mr. VAN HOLLEN', 'Ms. WARREN', 'Mr. MARKEY', 'Ms. STABENOW', 'Mr. PETERS', 'Ms. KLOBUCHAR',
'Mr. FRANKEN', 'Ms. SMITH', 'Mr. WICKER', 'Mr. COCHRAN', 'Mrs. HYDE-SMITH', 'Mrs. MCCASKILL',
'Mr. BLUNT', 'Mr. TESTER', 'Mr. DAINES', 'Mrs. FISCHER', 'Mr. SASSE', 'Mr. HELLER', 'Ms. CORTEZ MASTO',
'Mrs. SHAHEEN', 'Ms. HASSAN', 'Mr. MENENDEZ', 'Mr. BOOKER', 'Mr. HEINRICH', 'Mr. UDALL',
'Mrs. GILLIBRAND', 'Mr. SCHUMER', 'Mr. TILLIS', 'Mr. BURR', 'Ms. HEITKAMP', 'Mr. HOEVEN',
'Mr. BROWN', 'Mr. PORTMAN', 'Mr. INHOFE', 'Mr. LANKFORD', 'Mr. MERKLEY', 'Mr. WYDEN',
'Mr. CASEY', 'Mr. TOOMEY', 'Mr. WHITEHOUSE', 'Mr. REED', 'Mr. GRAHAM',
'Mr. SCOTT', 'Mr. ROUNDS', 'Mr. THUNE', 'Mr. CORKER', 'Mr. ALEXANDER', 'Mr. CRUZ',
'Mr. CORNYN', 'Mr. HATCH', 'Mr. LEE', 'Mr. SANDERS', 'Mr. LEAHY', 'Mr. KAINE',
'Mr. WARNER', 'Ms. CANTWELL', 'Mrs. MURRAY', 'Mr. MANCHIN', 'Mrs. CAPITO',
'Ms. BALDWIN', 'Mr. JOHNSON', 'Mr. BARRASSO', 'Mr. ENZI',
'The ACTING PRESIDENT', 'The PRESIDING OFFICER', 'The VICE PRESIDENT'
'Executive nominations confirmed by']
startDate = '20170106'
endDate = '20170130'
conRecords = downloadConRecords(startDate, endDate)
cleanConRecords = cleanRecords(conRecords, phraseDelete, segmentStart, segmentEnd)
textExport, textDump, speechExport, speakerExport, speakerAnalytics = writeAnalytics(conRecords, senators)
exportCSV(textExport,'textExport')
exportCSV(textDump,'textDump')
exportCSV(speechExport,'speechExport')
exportCSV(speakerExport,'speakerExport')
exportCSV(speakerAnalytics,'speakerAnalytics')
python beginner
New contributor
$endgroup$
add a comment |
$begingroup$
This is built with Python 2.7.15.
The goal of this script is to count the number of words spoken by each Senator on the floor of Congress between given dates.
It pulls from the Congressional Record, cuts out the sections not spoken on the floor (eg lists of Amendments) and returns a count of words by senator. The current list of senators is from the 115th Congress, from 2017 to 2018
# -*- coding: utf-8 -*-
from requests import get # to make GET request
import time
from datetime import date, timedelta
import os.path
import textract
import re
import csv
import sys
#download URLs
def downloadPDF(url, file_name):
#get request
response = get(url)
content_type = response.headers.get('content-type')
if content_type == "application/pdf":
# open in binary mode
save_path = '/Users/One/Document/Workspace/Projects/Senate/Congressional_Record/'
path = os.path.join(save_path, file_name)
with open(path, "wb") as file:
#write to file
file.write(response.content)
return 'SUCCESS'
#download congressional records and extract text
def downloadConRecords(startDate, endDate): #YYYYMMDD
#initialize lists
congressionalRecords =
dates =
#convert args to date-type
startDate = str(startDate)
endDate = str(endDate)
startDate = date(int(startDate[0:4]),int(startDate[4:6]),int(startDate[6:8]))
endDate = date(int(endDate[0:4]),int(endDate[4:6]),int(endDate[6:8]))
#list all the days to check
range = endDate - startDate
i=0
while i <= range.days:
dates += [str(startDate + timedelta(days=i))]
i+=1
#try URL, download PDF, add to list object
for i in dates:
YYYY = i[0:4]
MM = i[5:7]
DD = i[8:10]
url = ('https://www.congress.gov/crec/' + YYYY + '/'+ MM + '/' + DD + '/CREC-' + YYYY + '-' + MM + '-' + DD + '-senate.pdf')
filename = (YYYY+MM+DD+".pdf")
time.sleep(20)
if downloadPDF(url,filename) == "SUCCESS":
print(url)
textExtract =
textExtract = textract.process('/Users/One/Document/Workspace/Projects/Senate/Congressional_Record/' + filename)
#makes a list with the filename and the text
entry = [url, filename[:-4].strip(), textExtract]
congressionalRecords += [entry]
return congressionalRecords
#process the congressional records
def cleanRecords(congressionalRecords, phraseDelete, segmentStart, segmentEnd):
#eliminate n/, phrases, segments
for record in congressionalRecords:
clean =
dump =
clean = record[2].split('n')
clean = " ".join(clean)
for i in phraseDelete:
clean = clean.replace(i,'')
for start in enumerate(segmentStart):
erase = re.findall('{}.*?{}'.format(start[1], segmentEnd), clean)
clean = re.sub('{}.*?{}'.format(start[1], segmentEnd), ' ', clean)
dump += [erase]
record += [clean]
record += [dump]
return congressionalRecords
#split the text into sections based on the speaker
def multiDelimStringSplitter(aString, separators):
# separators is an array of strings that are being used to split the the string.
# sort separators in order of descending length
separators.sort(key=len)
listToReturn =
rows =
l =
i = 0
while i < len(aString):
theSeparator = ""
for current in separators:
if current == aString[i:i+len(current)]:
#if this section is the separateor, then set the variable to that separator
theSeparator = current
if theSeparator != "":
listToReturn += [theSeparator]
i = i + len(theSeparator)
else:
if listToReturn == :
listToReturn = [""]
if(listToReturn[-1] in separators):
listToReturn += [""]
#adds the text one character at a time
listToReturn[-1] += aString[i]
i += 1
i = 0
listToReturn = listToReturn[1:len(listToReturn)]
while (i+1) < len(listToReturn):
#initialize entry with speaker and text
entry = listToReturn[i:i+2]
#get word count and add it to the entry
text = listToReturn[i+1]
#strip punctuation out of the string
text = re.sub(r'[^ws]','',text)
wordcount = str(len(text.split()))
entry += [wordcount]
#add the entry as its own list
rows.append(entry)
i += 2
return rows
#get information on the records, speakers and word counts
def writeAnalytics(congressionalRecords, senators):
textExport = [['url','date','extractRaw','extractClean']]
textDump = [['date','extractDump']]
speechExport = [['date','speaker','text','wordCount']]
speakerExport = [['date','speaker','wordCount']]
speakerAnalytics = [['startDate', 'endDate', 'speaker', 'wordCount']]
perSpeakerTotal =
recordDates =
for i in senators:
# wordsperSpeech += [i,0]
perSpeakerTotal += [i,0]
#add delimiter separated text extracts
for record in congressionalRecords:
splitLines =
wordsperSpeaker =
#initialize the lists
for i in senators:
# wordsperSpeech += [i,0]
wordsperSpeaker += [i,0]
#split the extracts into each speech
splitLines = multiDelimStringSplitter(record[3],senators)
record += [splitLines]
#prep the dump export
for i in record[4]:
if len(i) > 0:
textDump += [[record[1], i]]
#add the words per speaker
for speech in record[5]:
speechExport += [[record[1],speech[0],speech[1],speech[2]]]
#find the speaker -> pull that person's wordsperSpeaker and add the speech's word count
wordsperSpeaker[wordsperSpeaker.index(speech[0])+1] += int(speech[2])
record += [wordsperSpeaker]
textExport += [[record[0],record[1],record[2],record[3]]]
recordDates += [record[1]]
#prepare export for the words per speaker
for i in senators:
speakerExport += [[record[1],i, wordsperSpeaker[wordsperSpeaker.index(i)+1]]]
#add word counts for senators who spoke
#find senator's index in perSpeaker total -> add the words spoken in that session (record)
perSpeakerTotal[perSpeakerTotal.index(i)+1] += record[6][record[6].index(i)+1]
for i in senators:
speakerAnalytics += [[min(recordDates), max(recordDates), i, perSpeakerTotal[perSpeakerTotal.index(i)+1]]]
return textExport, textDump, speechExport, speakerExport, speakerAnalytics
#export to CSV
def exportCSV(flatList, filename):
with open(filename+".csv", "wb") as f:
writer = csv.writer(f)
writer.writerows(flatList)
phraseDelete = [
',',
'This ‘‘bullet’’ symbol identifies statements or insertions which are not spoken by a Member of the Senate on the floor.'
]
segmentStart = [
' PRAYER ',
' PLEDGE OF ALLEGIANCE ',
' APPOINTMENT OF ACTING PRESIDENT PRO TEMPORE ',
' CERTIFICATES OF ELECTION ',
' MESSAGE FROM THE HOUSE ',
' MESSAGES FROM THE PRESIDENT ',
' EXECUTIVE MESSAGES REFERRED ',
' LIST OF SENATORS BY STATES ',
' PRESIDENTIAL MESSAGES ',
' ENROLLED BILLS PRESENTED ',
' MEASURES REFERRED ',
' EXECUTIVE AND OTHER COMMUNICATIONS ',
' REPORTS OF COMMITTEES ',
' EXECUTIVE REPORTS OF COMMITTEES ',
' AMENDMENTS SUBMITTED AND PROPOSED ',
' INTRODUCTION OF BILLS AND JOINT RESOLUTIONS ',
' ADDITIONAL COSPONSORS ',
' SUBMITTED RESOLUTIONS ',
' SUBMISSION OF CONCURRENT AND SENATE RESOLUTIONS ',
' SENATE RESOLUTION ',
' TEXT OF AMENDMENTS ',
' APPOINTMENT ',
' AUTHORITY FOR COMMITTEES TO MEET ',
' CONFIRMATION ',
' NOMINATION '
]
segmentEnd = ' f '
senators = ['Mr. SESSIONS', 'Mr. STRANGE', 'Mr. JONES', 'Mr. SHELBY', 'Mr. SULLIVAN', 'Ms. MURKOWSKI',
'Mr. FLAKE', 'Mr. MCCAIN', 'Mr. KYL', 'Mr. COTTON', 'Mr. BOOZMAN', 'Mrs. FEINSTEIN', 'Ms. HARRIS',
'Mr. GARDNER', 'Mr. BENNET', 'Mr. MURPHY', 'Mr. BLUMENTHAL', 'Mr. CARPER', 'Mr. COONS', 'Mr. NELSON', 'Mr. RUBIO',
'Mr. PERDUE', 'Mr. ISAKSON', 'Ms. HIRONO', 'Mr. SCHATZ', 'Mr. RISCH', 'Mr. CRAPO', 'Mr. DURBIN',
'Ms. DUCKWORTH', 'Mr. DONNELLY', 'Mr. YOUNG', 'Mrs. ERNST', 'Mr. GRASSLEY', 'Mr. ROBERTS', 'Mr. MORAN',
'Mr. MCCONNELL', 'Mr. PAUL', 'Mr. CASSIDY', 'Mr. KENNEDY', 'Mr. KING', 'Ms. COLLINS', 'Mr. CARDIN',
'Mr. VAN HOLLEN', 'Ms. WARREN', 'Mr. MARKEY', 'Ms. STABENOW', 'Mr. PETERS', 'Ms. KLOBUCHAR',
'Mr. FRANKEN', 'Ms. SMITH', 'Mr. WICKER', 'Mr. COCHRAN', 'Mrs. HYDE-SMITH', 'Mrs. MCCASKILL',
'Mr. BLUNT', 'Mr. TESTER', 'Mr. DAINES', 'Mrs. FISCHER', 'Mr. SASSE', 'Mr. HELLER', 'Ms. CORTEZ MASTO',
'Mrs. SHAHEEN', 'Ms. HASSAN', 'Mr. MENENDEZ', 'Mr. BOOKER', 'Mr. HEINRICH', 'Mr. UDALL',
'Mrs. GILLIBRAND', 'Mr. SCHUMER', 'Mr. TILLIS', 'Mr. BURR', 'Ms. HEITKAMP', 'Mr. HOEVEN',
'Mr. BROWN', 'Mr. PORTMAN', 'Mr. INHOFE', 'Mr. LANKFORD', 'Mr. MERKLEY', 'Mr. WYDEN',
'Mr. CASEY', 'Mr. TOOMEY', 'Mr. WHITEHOUSE', 'Mr. REED', 'Mr. GRAHAM',
'Mr. SCOTT', 'Mr. ROUNDS', 'Mr. THUNE', 'Mr. CORKER', 'Mr. ALEXANDER', 'Mr. CRUZ',
'Mr. CORNYN', 'Mr. HATCH', 'Mr. LEE', 'Mr. SANDERS', 'Mr. LEAHY', 'Mr. KAINE',
'Mr. WARNER', 'Ms. CANTWELL', 'Mrs. MURRAY', 'Mr. MANCHIN', 'Mrs. CAPITO',
'Ms. BALDWIN', 'Mr. JOHNSON', 'Mr. BARRASSO', 'Mr. ENZI',
'The ACTING PRESIDENT', 'The PRESIDING OFFICER', 'The VICE PRESIDENT'
'Executive nominations confirmed by']
startDate = '20170106'
endDate = '20170130'
conRecords = downloadConRecords(startDate, endDate)
cleanConRecords = cleanRecords(conRecords, phraseDelete, segmentStart, segmentEnd)
textExport, textDump, speechExport, speakerExport, speakerAnalytics = writeAnalytics(conRecords, senators)
exportCSV(textExport,'textExport')
exportCSV(textDump,'textDump')
exportCSV(speechExport,'speechExport')
exportCSV(speakerExport,'speakerExport')
exportCSV(speakerAnalytics,'speakerAnalytics')
python beginner
New contributor
$endgroup$
This is built with Python 2.7.15.
The goal of this script is to count the number of words spoken by each Senator on the floor of Congress between given dates.
It pulls from the Congressional Record, cuts out the sections not spoken on the floor (eg lists of Amendments) and returns a count of words by senator. The current list of senators is from the 115th Congress, from 2017 to 2018
# -*- coding: utf-8 -*-
from requests import get # to make GET request
import time
from datetime import date, timedelta
import os.path
import textract
import re
import csv
import sys
#download URLs
def downloadPDF(url, file_name):
#get request
response = get(url)
content_type = response.headers.get('content-type')
if content_type == "application/pdf":
# open in binary mode
save_path = '/Users/One/Document/Workspace/Projects/Senate/Congressional_Record/'
path = os.path.join(save_path, file_name)
with open(path, "wb") as file:
#write to file
file.write(response.content)
return 'SUCCESS'
#download congressional records and extract text
def downloadConRecords(startDate, endDate): #YYYYMMDD
#initialize lists
congressionalRecords =
dates =
#convert args to date-type
startDate = str(startDate)
endDate = str(endDate)
startDate = date(int(startDate[0:4]),int(startDate[4:6]),int(startDate[6:8]))
endDate = date(int(endDate[0:4]),int(endDate[4:6]),int(endDate[6:8]))
#list all the days to check
range = endDate - startDate
i=0
while i <= range.days:
dates += [str(startDate + timedelta(days=i))]
i+=1
#try URL, download PDF, add to list object
for i in dates:
YYYY = i[0:4]
MM = i[5:7]
DD = i[8:10]
url = ('https://www.congress.gov/crec/' + YYYY + '/'+ MM + '/' + DD + '/CREC-' + YYYY + '-' + MM + '-' + DD + '-senate.pdf')
filename = (YYYY+MM+DD+".pdf")
time.sleep(20)
if downloadPDF(url,filename) == "SUCCESS":
print(url)
textExtract =
textExtract = textract.process('/Users/One/Document/Workspace/Projects/Senate/Congressional_Record/' + filename)
#makes a list with the filename and the text
entry = [url, filename[:-4].strip(), textExtract]
congressionalRecords += [entry]
return congressionalRecords
#process the congressional records
def cleanRecords(congressionalRecords, phraseDelete, segmentStart, segmentEnd):
#eliminate n/, phrases, segments
for record in congressionalRecords:
clean =
dump =
clean = record[2].split('n')
clean = " ".join(clean)
for i in phraseDelete:
clean = clean.replace(i,'')
for start in enumerate(segmentStart):
erase = re.findall('{}.*?{}'.format(start[1], segmentEnd), clean)
clean = re.sub('{}.*?{}'.format(start[1], segmentEnd), ' ', clean)
dump += [erase]
record += [clean]
record += [dump]
return congressionalRecords
#split the text into sections based on the speaker
def multiDelimStringSplitter(aString, separators):
# separators is an array of strings that are being used to split the the string.
# sort separators in order of descending length
separators.sort(key=len)
listToReturn =
rows =
l =
i = 0
while i < len(aString):
theSeparator = ""
for current in separators:
if current == aString[i:i+len(current)]:
#if this section is the separateor, then set the variable to that separator
theSeparator = current
if theSeparator != "":
listToReturn += [theSeparator]
i = i + len(theSeparator)
else:
if listToReturn == :
listToReturn = [""]
if(listToReturn[-1] in separators):
listToReturn += [""]
#adds the text one character at a time
listToReturn[-1] += aString[i]
i += 1
i = 0
listToReturn = listToReturn[1:len(listToReturn)]
while (i+1) < len(listToReturn):
#initialize entry with speaker and text
entry = listToReturn[i:i+2]
#get word count and add it to the entry
text = listToReturn[i+1]
#strip punctuation out of the string
text = re.sub(r'[^ws]','',text)
wordcount = str(len(text.split()))
entry += [wordcount]
#add the entry as its own list
rows.append(entry)
i += 2
return rows
#get information on the records, speakers and word counts
def writeAnalytics(congressionalRecords, senators):
textExport = [['url','date','extractRaw','extractClean']]
textDump = [['date','extractDump']]
speechExport = [['date','speaker','text','wordCount']]
speakerExport = [['date','speaker','wordCount']]
speakerAnalytics = [['startDate', 'endDate', 'speaker', 'wordCount']]
perSpeakerTotal =
recordDates =
for i in senators:
# wordsperSpeech += [i,0]
perSpeakerTotal += [i,0]
#add delimiter separated text extracts
for record in congressionalRecords:
splitLines =
wordsperSpeaker =
#initialize the lists
for i in senators:
# wordsperSpeech += [i,0]
wordsperSpeaker += [i,0]
#split the extracts into each speech
splitLines = multiDelimStringSplitter(record[3],senators)
record += [splitLines]
#prep the dump export
for i in record[4]:
if len(i) > 0:
textDump += [[record[1], i]]
#add the words per speaker
for speech in record[5]:
speechExport += [[record[1],speech[0],speech[1],speech[2]]]
#find the speaker -> pull that person's wordsperSpeaker and add the speech's word count
wordsperSpeaker[wordsperSpeaker.index(speech[0])+1] += int(speech[2])
record += [wordsperSpeaker]
textExport += [[record[0],record[1],record[2],record[3]]]
recordDates += [record[1]]
#prepare export for the words per speaker
for i in senators:
speakerExport += [[record[1],i, wordsperSpeaker[wordsperSpeaker.index(i)+1]]]
#add word counts for senators who spoke
#find senator's index in perSpeaker total -> add the words spoken in that session (record)
perSpeakerTotal[perSpeakerTotal.index(i)+1] += record[6][record[6].index(i)+1]
for i in senators:
speakerAnalytics += [[min(recordDates), max(recordDates), i, perSpeakerTotal[perSpeakerTotal.index(i)+1]]]
return textExport, textDump, speechExport, speakerExport, speakerAnalytics
#export to CSV
def exportCSV(flatList, filename):
with open(filename+".csv", "wb") as f:
writer = csv.writer(f)
writer.writerows(flatList)
phraseDelete = [
',',
'This ‘‘bullet’’ symbol identifies statements or insertions which are not spoken by a Member of the Senate on the floor.'
]
segmentStart = [
' PRAYER ',
' PLEDGE OF ALLEGIANCE ',
' APPOINTMENT OF ACTING PRESIDENT PRO TEMPORE ',
' CERTIFICATES OF ELECTION ',
' MESSAGE FROM THE HOUSE ',
' MESSAGES FROM THE PRESIDENT ',
' EXECUTIVE MESSAGES REFERRED ',
' LIST OF SENATORS BY STATES ',
' PRESIDENTIAL MESSAGES ',
' ENROLLED BILLS PRESENTED ',
' MEASURES REFERRED ',
' EXECUTIVE AND OTHER COMMUNICATIONS ',
' REPORTS OF COMMITTEES ',
' EXECUTIVE REPORTS OF COMMITTEES ',
' AMENDMENTS SUBMITTED AND PROPOSED ',
' INTRODUCTION OF BILLS AND JOINT RESOLUTIONS ',
' ADDITIONAL COSPONSORS ',
' SUBMITTED RESOLUTIONS ',
' SUBMISSION OF CONCURRENT AND SENATE RESOLUTIONS ',
' SENATE RESOLUTION ',
' TEXT OF AMENDMENTS ',
' APPOINTMENT ',
' AUTHORITY FOR COMMITTEES TO MEET ',
' CONFIRMATION ',
' NOMINATION '
]
segmentEnd = ' f '
senators = ['Mr. SESSIONS', 'Mr. STRANGE', 'Mr. JONES', 'Mr. SHELBY', 'Mr. SULLIVAN', 'Ms. MURKOWSKI',
'Mr. FLAKE', 'Mr. MCCAIN', 'Mr. KYL', 'Mr. COTTON', 'Mr. BOOZMAN', 'Mrs. FEINSTEIN', 'Ms. HARRIS',
'Mr. GARDNER', 'Mr. BENNET', 'Mr. MURPHY', 'Mr. BLUMENTHAL', 'Mr. CARPER', 'Mr. COONS', 'Mr. NELSON', 'Mr. RUBIO',
'Mr. PERDUE', 'Mr. ISAKSON', 'Ms. HIRONO', 'Mr. SCHATZ', 'Mr. RISCH', 'Mr. CRAPO', 'Mr. DURBIN',
'Ms. DUCKWORTH', 'Mr. DONNELLY', 'Mr. YOUNG', 'Mrs. ERNST', 'Mr. GRASSLEY', 'Mr. ROBERTS', 'Mr. MORAN',
'Mr. MCCONNELL', 'Mr. PAUL', 'Mr. CASSIDY', 'Mr. KENNEDY', 'Mr. KING', 'Ms. COLLINS', 'Mr. CARDIN',
'Mr. VAN HOLLEN', 'Ms. WARREN', 'Mr. MARKEY', 'Ms. STABENOW', 'Mr. PETERS', 'Ms. KLOBUCHAR',
'Mr. FRANKEN', 'Ms. SMITH', 'Mr. WICKER', 'Mr. COCHRAN', 'Mrs. HYDE-SMITH', 'Mrs. MCCASKILL',
'Mr. BLUNT', 'Mr. TESTER', 'Mr. DAINES', 'Mrs. FISCHER', 'Mr. SASSE', 'Mr. HELLER', 'Ms. CORTEZ MASTO',
'Mrs. SHAHEEN', 'Ms. HASSAN', 'Mr. MENENDEZ', 'Mr. BOOKER', 'Mr. HEINRICH', 'Mr. UDALL',
'Mrs. GILLIBRAND', 'Mr. SCHUMER', 'Mr. TILLIS', 'Mr. BURR', 'Ms. HEITKAMP', 'Mr. HOEVEN',
'Mr. BROWN', 'Mr. PORTMAN', 'Mr. INHOFE', 'Mr. LANKFORD', 'Mr. MERKLEY', 'Mr. WYDEN',
'Mr. CASEY', 'Mr. TOOMEY', 'Mr. WHITEHOUSE', 'Mr. REED', 'Mr. GRAHAM',
'Mr. SCOTT', 'Mr. ROUNDS', 'Mr. THUNE', 'Mr. CORKER', 'Mr. ALEXANDER', 'Mr. CRUZ',
'Mr. CORNYN', 'Mr. HATCH', 'Mr. LEE', 'Mr. SANDERS', 'Mr. LEAHY', 'Mr. KAINE',
'Mr. WARNER', 'Ms. CANTWELL', 'Mrs. MURRAY', 'Mr. MANCHIN', 'Mrs. CAPITO',
'Ms. BALDWIN', 'Mr. JOHNSON', 'Mr. BARRASSO', 'Mr. ENZI',
'The ACTING PRESIDENT', 'The PRESIDING OFFICER', 'The VICE PRESIDENT'
'Executive nominations confirmed by']
startDate = '20170106'
endDate = '20170130'
conRecords = downloadConRecords(startDate, endDate)
cleanConRecords = cleanRecords(conRecords, phraseDelete, segmentStart, segmentEnd)
textExport, textDump, speechExport, speakerExport, speakerAnalytics = writeAnalytics(conRecords, senators)
exportCSV(textExport,'textExport')
exportCSV(textDump,'textDump')
exportCSV(speechExport,'speechExport')
exportCSV(speakerExport,'speakerExport')
exportCSV(speakerAnalytics,'speakerAnalytics')
python beginner
python beginner
New contributor
New contributor
New contributor
asked 4 mins ago
SebastianSebastian
6
6
New contributor
New contributor
add a comment |
add a comment |
0
active
oldest
votes
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sebastian is a new contributor. Be nice, and check out our Code of Conduct.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f212599%2fpython-script-to-pull-text-data%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
0
active
oldest
votes
0
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sebastian is a new contributor. Be nice, and check out our Code of Conduct.
Sebastian is a new contributor. Be nice, and check out our Code of Conduct.
Sebastian is a new contributor. Be nice, and check out our Code of Conduct.
Sebastian is a new contributor. Be nice, and check out our Code of Conduct.
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f212599%2fpython-script-to-pull-text-data%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown