How can I make this Python code (more) object oriented? Uploading songs from website to Database and then to...
$begingroup$
My Python program parses songs from a website, corrects song titles and artists with the Last.fm API, searches for the spotify uri using the Spotify API, stores all the information in a SQLite database and then uploads it into a Spotify playlist with the Spotify API.
I would like to make the program object oriented and need some advice on how to do that.
Some general python advice would also be useful.
Thanks in advance guys!
I have a separate config.py file with all the needed API variables.
scraper.py
# -*- coding: utf-8 -*-
# import config file
import config
# import libraries
from bs4 import BeautifulSoup
import datetime
import urllib.request as urllib
import sys
import time
import re
import sqlite3
# webdriver libraries
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
# spotipy library
import spotipy
import spotipy.util as util
# import pylast
import pylast
# song class holds information about each song
class Song:
artist = None
song = None
spotify_uri = None
def __init__(self, artist, song, spotify_uri):
self.artist = artist
self.song = song
self.spotify_uri = spotify_uri
def printSong(self):
print(self.artist, '-', self.song, ', Uri:', self.spotify_uri)
##------------------------------------------------------------------------------
## Get Date of latest sunday
##
## @return formatted date of last sunday as yyyymmdd
#
def getSundayDate():
today = datetime.date.today()
sun_offset = (today.weekday() - 6) % 7
sunday_of_week = today - datetime.timedelta(days=sun_offset)
sunday_date = sunday_of_week.strftime('%Y%m%d')
return sunday_date
##------------------------------------------------------------------------------
## URL Pattern
##
## https://fm4.orf.at/player/20190120/SSU
## URL pattern:
## /yyyymmdd/SSU
## /20190120/SSU
## SSU is just Sunny Side Up the show from 10am till 1pm
## URL pattern changes ever day, we need to change it every week,
## to only get sundays
##
## @return concatenated URL of website
def getURLPattern():
return 'https://fm4.orf.at/player/' + getSundayDate() + '/SSU'
##------------------------------------------------------------------------------
## Get html source from page specified by page_url
##
## @return html source as beautiful soup object
#
def getHtmlFromPage():
page_URL = getURLPattern()
options = Options()
options.headless = True
profile = webdriver.FirefoxProfile()
profile.set_preference("media.volume_scale", "0.0")
driver = webdriver.Firefox(options=options, firefox_profile=profile)
driver.get(page_URL)
wait = WebDriverWait(driver, 3)
wait.until(EC.presence_of_element_located((By.CLASS_NAME,
'broadcast-items-list')))
time.sleep(1)
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()
return soup
##------------------------------------------------------------------------------
## remove bad characters from list
##
## @param list, list with elements to check
#
def sanitize(strList):
regex_remove = r'([^A-zsdäöüÄÖÜß-][\^]?)'
regex_ft = r'(ft.?([^n]s?w*)+)'
# check for bad characters
for i in range(len(strList)):
strList[i] = re.sub(regex_remove, "", str(strList[i]))
strList[i] = re.sub(regex_ft, "", strList[i])
##------------------------------------------------------------------------------
## print music
##
## @param lists to print
#
def printMusic(interpreter_list, title_list):
for element in range(len(interpreter_list)):
print(interpreter_list[element] + " : " + title_list[element])
##------------------------------------------------------------------------------
## parse html
##
## @param lists to write results to
#
def parseHtml(interpreter_list, title_list):
soup = getHtmlFromPage()
# find all interpreter in playlist
interpreter = soup.find_all("div", {"class": "interpreter"})
# find all titles in playlist
title = soup.find_all("div", {"class": "title"})
# Check for errors
if (len(interpreter) != len(title)):
raise Exception("The amount of interpreters don't correspond" +
"to the amount of titles.")
if (len(interpreter) == 0):
raise Exception("No FM4 music playlist found in given url")
for element in range(len(interpreter)):
interpreter_list.append(interpreter[element].text)
title_list.append(title[element].text)
##------------------------------------------------------------------------------
## create Token with given credentials
##
## @return authentication token
#
def getToken():
# authetication token
token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
config.CLIENT_SECRET, config.REDIRECT_URI)
if token:
return token
else:
raise Exception("Could not get authentication token from spotify!")
##------------------------------------------------------------------------------
## search track and get spotify uri
##
## @param token, authentication token
## @param interpreter && title, strings containing track info
## @return uri string
#
def getUri(spotify_Obj, interpreter, title):
result = spotify_Obj.search(q=interpreter + ' ' + title)
if (result != None):
if (len(result['tracks']['items']) != 0):
track_id = result['tracks']['items'][0]['uri']
uri = str(track_id)
return uri
##------------------------------------------------------------------------------
## correct artist name and track title with lastFm api
##
## @param1 artist_name, name of artist to correct
## @param2 title_name, title name to correct
## @return track_corrected, corrected Track object
#
def getTrackInfo(artist_name, track_name):
# network authentication
last_Fm = getLastFmNetworkAuth()
# declare artist_name as artist object
artist = last_Fm.get_artist(artist_name)
# correct artist name
artist_corrected_name = artist.get_correction()
track = last_Fm.get_track(artist_corrected_name, track_name)
track_corrected_name = track.get_correction()
trackInfo = pylast.Track(artist_corrected_name, track_corrected_name,
last_Fm)
return trackInfo
##------------------------------------------------------------------------------
## get last fm network authentication
##
## @return network authentication token
#
def getLastFmNetworkAuth():
network = pylast.LastFMNetwork(config.LASTFM_API_KEY, config.LASTFM_API_SECRET)
return network
##------------------------------------------------------------------------------
## parse music items from website, put them into a list, sanitize lists,
## correct artist names and song titles with last.fm API and save list in a
## sqlite database for further usage
##
## @return network authentication token
#
def parseTracksIntoSongClassList(song_list):
# lists containing the Interpreter and title
interpreter_list =
title_list =
# fill lists with results
parseHtml(interpreter_list, title_list)
print(datetime.datetime.now(), "Done parsing html")
# remove bad characters from lists
sanitize(interpreter_list)
sanitize(title_list)
# get Token and create spotify object
sp = spotipy.Spotify(getToken())
# correct artist and title names
for element in range(len(interpreter_list)):
track_info = getTrackInfo(interpreter_list[element],
title_list[element])
title = str(track_info.get_name())
artist = str(track_info.get_artist())
if (title != artist):
if (title is not None):
title_list[element] = title
if (artist is not None):
interpreter_list[element] = artist
else:
title_list[element] = title_list[element]
interpreter_list[element] = interpreter_list[element]
# get spotify uri for song
spotify_uri = getUri(sp, interpreter_list[element], title_list[element])
if (spotify_uri != None and len(spotify_uri) != 0):
track_uri = str(spotify_uri)
song_list.append(Song(interpreter_list[element],
title_list[element], track_uri))
print(datetime.datetime.now(), "Done parsing songs")
##------------------------------------------------------------------------------
## insert new songs to database, checks for duplicates and ignores them
##
## @param song_list, list containing songs which need to be inserted
## into database
#
def updateDatabase(song_list):
conn = sqlite3.connect('SongDatabase.db')
c = conn.cursor()
# date to insert into table
today = datetime.date.today()
today.strftime('%Y-%m-%d')
c.execute('''CREATE TABLE IF NOT EXISTS songs
(SongID INTEGER PRIMARY KEY, artist_name TEXT, song_name TEXT,
spotify_uri TEXT, UploadDate TIMESTAMP, Uploaded INTEGER,
UNIQUE(artist_name, song_name, spotify_uri) ON CONFLICT IGNORE)''')
for item in range(len(song_list)):
c.execute('''INSERT INTO songs
(artist_name, song_name, spotify_uri, UploadDate, Uploaded)
VALUES (?,?,?,?,?)''', (song_list[item].artist, song_list[item].song,
song_list[item].spotify_uri, today, 0))
conn.commit()
c.close()
print(datetime.datetime.now(), "Done updating Database")
##------------------------------------------------------------------------------
## copy Uris from song_list into new list
##
## @param song_list, list containing songs which get copied into new list
## @return track_list, list containing all song uris
#
def getUrisList(song_list):
uri_list =
for song in range(len(song_list)):
uri_list.append(song_list[song].spotify_uri)
print(uri_list)
return uri_list
##------------------------------------------------------------------------------
## Main part of the program
## get html and parse important parts into file
#
if __name__ == '__main__':
# list to fill with corrected songs
song_list =
# parse songs into song_list
parseTracksIntoSongClassList(song_list)
# insert song_list into database
updateDatabase(song_list)
dataManager.py
# -*- coding: utf-8 -*-
# import config file
import config
import sqlite3
import pandas as pd
# spotipy library
import spotipy
import spotipy.util as util
##------------------------------------------------------------------------------
## create Token with given credentials
##
## @return authentication token
#
def getToken():
# authetication token
token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
config.CLIENT_SECRET, config.REDIRECT_URI)
return token
##------------------------------------------------------------------------------
## insert new songs to database, checks for duplicates and ignores them
##
## @param song_list, list containing songs to be inserted into database
#
def uploadSongsToSpotify():
# declare db name
database_name = 'SongDatabase.db'
# spotify auth token
sp = spotipy.Spotify(getToken())
if sp:
# spotify username
username = config.USERNAME
# spotify ide of playlist
playlist_id = config.PLAYLIST_ID
conn = sqlite3.connect(database_name)
c = conn.cursor()
c.execute("""SELECT spotify_uri FROM songs WHERE (Uploaded = 0)""")
# save query results in tuple
data = c.fetchall()
# save uris in list, for spotipy
uri_list =
for item in range(len(data)):
uri_list.append(str(data[item][0]))
print(uri_list)
# upload uri_list to spotify
# check for empty list
if (len(uri_list) != 0):
sp.user_playlist_add_tracks(username, playlist_id, uri_list)
# set Uploaded values in database to 1
c.execute("""UPDATE songs SET Uploaded = ? WHERE Uploaded = ?""", (1, 0))
conn.commit()
else:
raise Exception("There aren't any new songs in database, songs were already uploaded")
c.close()
else:
raise Exception("Could not get token from spotify API")
if __name__ == '__main__':
uploadSongsToSpotify()
object-oriented python-3.x
New contributor
$endgroup$
add a comment |
$begingroup$
My Python program parses songs from a website, corrects song titles and artists with the Last.fm API, searches for the spotify uri using the Spotify API, stores all the information in a SQLite database and then uploads it into a Spotify playlist with the Spotify API.
I would like to make the program object oriented and need some advice on how to do that.
Some general python advice would also be useful.
Thanks in advance guys!
I have a separate config.py file with all the needed API variables.
scraper.py
# -*- coding: utf-8 -*-
# import config file
import config
# import libraries
from bs4 import BeautifulSoup
import datetime
import urllib.request as urllib
import sys
import time
import re
import sqlite3
# webdriver libraries
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
# spotipy library
import spotipy
import spotipy.util as util
# import pylast
import pylast
# song class holds information about each song
class Song:
artist = None
song = None
spotify_uri = None
def __init__(self, artist, song, spotify_uri):
self.artist = artist
self.song = song
self.spotify_uri = spotify_uri
def printSong(self):
print(self.artist, '-', self.song, ', Uri:', self.spotify_uri)
##------------------------------------------------------------------------------
## Get Date of latest sunday
##
## @return formatted date of last sunday as yyyymmdd
#
def getSundayDate():
today = datetime.date.today()
sun_offset = (today.weekday() - 6) % 7
sunday_of_week = today - datetime.timedelta(days=sun_offset)
sunday_date = sunday_of_week.strftime('%Y%m%d')
return sunday_date
##------------------------------------------------------------------------------
## URL Pattern
##
## https://fm4.orf.at/player/20190120/SSU
## URL pattern:
## /yyyymmdd/SSU
## /20190120/SSU
## SSU is just Sunny Side Up the show from 10am till 1pm
## URL pattern changes ever day, we need to change it every week,
## to only get sundays
##
## @return concatenated URL of website
def getURLPattern():
return 'https://fm4.orf.at/player/' + getSundayDate() + '/SSU'
##------------------------------------------------------------------------------
## Get html source from page specified by page_url
##
## @return html source as beautiful soup object
#
def getHtmlFromPage():
page_URL = getURLPattern()
options = Options()
options.headless = True
profile = webdriver.FirefoxProfile()
profile.set_preference("media.volume_scale", "0.0")
driver = webdriver.Firefox(options=options, firefox_profile=profile)
driver.get(page_URL)
wait = WebDriverWait(driver, 3)
wait.until(EC.presence_of_element_located((By.CLASS_NAME,
'broadcast-items-list')))
time.sleep(1)
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()
return soup
##------------------------------------------------------------------------------
## remove bad characters from list
##
## @param list, list with elements to check
#
def sanitize(strList):
regex_remove = r'([^A-zsdäöüÄÖÜß-][\^]?)'
regex_ft = r'(ft.?([^n]s?w*)+)'
# check for bad characters
for i in range(len(strList)):
strList[i] = re.sub(regex_remove, "", str(strList[i]))
strList[i] = re.sub(regex_ft, "", strList[i])
##------------------------------------------------------------------------------
## print music
##
## @param lists to print
#
def printMusic(interpreter_list, title_list):
for element in range(len(interpreter_list)):
print(interpreter_list[element] + " : " + title_list[element])
##------------------------------------------------------------------------------
## parse html
##
## @param lists to write results to
#
def parseHtml(interpreter_list, title_list):
soup = getHtmlFromPage()
# find all interpreter in playlist
interpreter = soup.find_all("div", {"class": "interpreter"})
# find all titles in playlist
title = soup.find_all("div", {"class": "title"})
# Check for errors
if (len(interpreter) != len(title)):
raise Exception("The amount of interpreters don't correspond" +
"to the amount of titles.")
if (len(interpreter) == 0):
raise Exception("No FM4 music playlist found in given url")
for element in range(len(interpreter)):
interpreter_list.append(interpreter[element].text)
title_list.append(title[element].text)
##------------------------------------------------------------------------------
## create Token with given credentials
##
## @return authentication token
#
def getToken():
# authetication token
token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
config.CLIENT_SECRET, config.REDIRECT_URI)
if token:
return token
else:
raise Exception("Could not get authentication token from spotify!")
##------------------------------------------------------------------------------
## search track and get spotify uri
##
## @param token, authentication token
## @param interpreter && title, strings containing track info
## @return uri string
#
def getUri(spotify_Obj, interpreter, title):
result = spotify_Obj.search(q=interpreter + ' ' + title)
if (result != None):
if (len(result['tracks']['items']) != 0):
track_id = result['tracks']['items'][0]['uri']
uri = str(track_id)
return uri
##------------------------------------------------------------------------------
## correct artist name and track title with lastFm api
##
## @param1 artist_name, name of artist to correct
## @param2 title_name, title name to correct
## @return track_corrected, corrected Track object
#
def getTrackInfo(artist_name, track_name):
# network authentication
last_Fm = getLastFmNetworkAuth()
# declare artist_name as artist object
artist = last_Fm.get_artist(artist_name)
# correct artist name
artist_corrected_name = artist.get_correction()
track = last_Fm.get_track(artist_corrected_name, track_name)
track_corrected_name = track.get_correction()
trackInfo = pylast.Track(artist_corrected_name, track_corrected_name,
last_Fm)
return trackInfo
##------------------------------------------------------------------------------
## get last fm network authentication
##
## @return network authentication token
#
def getLastFmNetworkAuth():
network = pylast.LastFMNetwork(config.LASTFM_API_KEY, config.LASTFM_API_SECRET)
return network
##------------------------------------------------------------------------------
## parse music items from website, put them into a list, sanitize lists,
## correct artist names and song titles with last.fm API and save list in a
## sqlite database for further usage
##
## @return network authentication token
#
def parseTracksIntoSongClassList(song_list):
# lists containing the Interpreter and title
interpreter_list =
title_list =
# fill lists with results
parseHtml(interpreter_list, title_list)
print(datetime.datetime.now(), "Done parsing html")
# remove bad characters from lists
sanitize(interpreter_list)
sanitize(title_list)
# get Token and create spotify object
sp = spotipy.Spotify(getToken())
# correct artist and title names
for element in range(len(interpreter_list)):
track_info = getTrackInfo(interpreter_list[element],
title_list[element])
title = str(track_info.get_name())
artist = str(track_info.get_artist())
if (title != artist):
if (title is not None):
title_list[element] = title
if (artist is not None):
interpreter_list[element] = artist
else:
title_list[element] = title_list[element]
interpreter_list[element] = interpreter_list[element]
# get spotify uri for song
spotify_uri = getUri(sp, interpreter_list[element], title_list[element])
if (spotify_uri != None and len(spotify_uri) != 0):
track_uri = str(spotify_uri)
song_list.append(Song(interpreter_list[element],
title_list[element], track_uri))
print(datetime.datetime.now(), "Done parsing songs")
##------------------------------------------------------------------------------
## insert new songs to database, checks for duplicates and ignores them
##
## @param song_list, list containing songs which need to be inserted
## into database
#
def updateDatabase(song_list):
conn = sqlite3.connect('SongDatabase.db')
c = conn.cursor()
# date to insert into table
today = datetime.date.today()
today.strftime('%Y-%m-%d')
c.execute('''CREATE TABLE IF NOT EXISTS songs
(SongID INTEGER PRIMARY KEY, artist_name TEXT, song_name TEXT,
spotify_uri TEXT, UploadDate TIMESTAMP, Uploaded INTEGER,
UNIQUE(artist_name, song_name, spotify_uri) ON CONFLICT IGNORE)''')
for item in range(len(song_list)):
c.execute('''INSERT INTO songs
(artist_name, song_name, spotify_uri, UploadDate, Uploaded)
VALUES (?,?,?,?,?)''', (song_list[item].artist, song_list[item].song,
song_list[item].spotify_uri, today, 0))
conn.commit()
c.close()
print(datetime.datetime.now(), "Done updating Database")
##------------------------------------------------------------------------------
## copy Uris from song_list into new list
##
## @param song_list, list containing songs which get copied into new list
## @return track_list, list containing all song uris
#
def getUrisList(song_list):
uri_list =
for song in range(len(song_list)):
uri_list.append(song_list[song].spotify_uri)
print(uri_list)
return uri_list
##------------------------------------------------------------------------------
## Main part of the program
## get html and parse important parts into file
#
if __name__ == '__main__':
# list to fill with corrected songs
song_list =
# parse songs into song_list
parseTracksIntoSongClassList(song_list)
# insert song_list into database
updateDatabase(song_list)
dataManager.py
# -*- coding: utf-8 -*-
# import config file
import config
import sqlite3
import pandas as pd
# spotipy library
import spotipy
import spotipy.util as util
##------------------------------------------------------------------------------
## create Token with given credentials
##
## @return authentication token
#
def getToken():
# authetication token
token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
config.CLIENT_SECRET, config.REDIRECT_URI)
return token
##------------------------------------------------------------------------------
## insert new songs to database, checks for duplicates and ignores them
##
## @param song_list, list containing songs to be inserted into database
#
def uploadSongsToSpotify():
# declare db name
database_name = 'SongDatabase.db'
# spotify auth token
sp = spotipy.Spotify(getToken())
if sp:
# spotify username
username = config.USERNAME
# spotify ide of playlist
playlist_id = config.PLAYLIST_ID
conn = sqlite3.connect(database_name)
c = conn.cursor()
c.execute("""SELECT spotify_uri FROM songs WHERE (Uploaded = 0)""")
# save query results in tuple
data = c.fetchall()
# save uris in list, for spotipy
uri_list =
for item in range(len(data)):
uri_list.append(str(data[item][0]))
print(uri_list)
# upload uri_list to spotify
# check for empty list
if (len(uri_list) != 0):
sp.user_playlist_add_tracks(username, playlist_id, uri_list)
# set Uploaded values in database to 1
c.execute("""UPDATE songs SET Uploaded = ? WHERE Uploaded = ?""", (1, 0))
conn.commit()
else:
raise Exception("There aren't any new songs in database, songs were already uploaded")
c.close()
else:
raise Exception("Could not get token from spotify API")
if __name__ == '__main__':
uploadSongsToSpotify()
object-oriented python-3.x
New contributor
$endgroup$
add a comment |
$begingroup$
My Python program parses songs from a website, corrects song titles and artists with the Last.fm API, searches for the spotify uri using the Spotify API, stores all the information in a SQLite database and then uploads it into a Spotify playlist with the Spotify API.
I would like to make the program object oriented and need some advice on how to do that.
Some general python advice would also be useful.
Thanks in advance guys!
I have a separate config.py file with all the needed API variables.
scraper.py
# -*- coding: utf-8 -*-
# import config file
import config
# import libraries
from bs4 import BeautifulSoup
import datetime
import urllib.request as urllib
import sys
import time
import re
import sqlite3
# webdriver libraries
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
# spotipy library
import spotipy
import spotipy.util as util
# import pylast
import pylast
# song class holds information about each song
class Song:
artist = None
song = None
spotify_uri = None
def __init__(self, artist, song, spotify_uri):
self.artist = artist
self.song = song
self.spotify_uri = spotify_uri
def printSong(self):
print(self.artist, '-', self.song, ', Uri:', self.spotify_uri)
##------------------------------------------------------------------------------
## Get Date of latest sunday
##
## @return formatted date of last sunday as yyyymmdd
#
def getSundayDate():
today = datetime.date.today()
sun_offset = (today.weekday() - 6) % 7
sunday_of_week = today - datetime.timedelta(days=sun_offset)
sunday_date = sunday_of_week.strftime('%Y%m%d')
return sunday_date
##------------------------------------------------------------------------------
## URL Pattern
##
## https://fm4.orf.at/player/20190120/SSU
## URL pattern:
## /yyyymmdd/SSU
## /20190120/SSU
## SSU is just Sunny Side Up the show from 10am till 1pm
## URL pattern changes ever day, we need to change it every week,
## to only get sundays
##
## @return concatenated URL of website
def getURLPattern():
return 'https://fm4.orf.at/player/' + getSundayDate() + '/SSU'
##------------------------------------------------------------------------------
## Get html source from page specified by page_url
##
## @return html source as beautiful soup object
#
def getHtmlFromPage():
page_URL = getURLPattern()
options = Options()
options.headless = True
profile = webdriver.FirefoxProfile()
profile.set_preference("media.volume_scale", "0.0")
driver = webdriver.Firefox(options=options, firefox_profile=profile)
driver.get(page_URL)
wait = WebDriverWait(driver, 3)
wait.until(EC.presence_of_element_located((By.CLASS_NAME,
'broadcast-items-list')))
time.sleep(1)
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()
return soup
##------------------------------------------------------------------------------
## remove bad characters from list
##
## @param list, list with elements to check
#
def sanitize(strList):
regex_remove = r'([^A-zsdäöüÄÖÜß-][\^]?)'
regex_ft = r'(ft.?([^n]s?w*)+)'
# check for bad characters
for i in range(len(strList)):
strList[i] = re.sub(regex_remove, "", str(strList[i]))
strList[i] = re.sub(regex_ft, "", strList[i])
##------------------------------------------------------------------------------
## print music
##
## @param lists to print
#
def printMusic(interpreter_list, title_list):
for element in range(len(interpreter_list)):
print(interpreter_list[element] + " : " + title_list[element])
##------------------------------------------------------------------------------
## parse html
##
## @param lists to write results to
#
def parseHtml(interpreter_list, title_list):
soup = getHtmlFromPage()
# find all interpreter in playlist
interpreter = soup.find_all("div", {"class": "interpreter"})
# find all titles in playlist
title = soup.find_all("div", {"class": "title"})
# Check for errors
if (len(interpreter) != len(title)):
raise Exception("The amount of interpreters don't correspond" +
"to the amount of titles.")
if (len(interpreter) == 0):
raise Exception("No FM4 music playlist found in given url")
for element in range(len(interpreter)):
interpreter_list.append(interpreter[element].text)
title_list.append(title[element].text)
##------------------------------------------------------------------------------
## create Token with given credentials
##
## @return authentication token
#
def getToken():
# authetication token
token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
config.CLIENT_SECRET, config.REDIRECT_URI)
if token:
return token
else:
raise Exception("Could not get authentication token from spotify!")
##------------------------------------------------------------------------------
## search track and get spotify uri
##
## @param token, authentication token
## @param interpreter && title, strings containing track info
## @return uri string
#
def getUri(spotify_Obj, interpreter, title):
result = spotify_Obj.search(q=interpreter + ' ' + title)
if (result != None):
if (len(result['tracks']['items']) != 0):
track_id = result['tracks']['items'][0]['uri']
uri = str(track_id)
return uri
##------------------------------------------------------------------------------
## correct artist name and track title with lastFm api
##
## @param1 artist_name, name of artist to correct
## @param2 title_name, title name to correct
## @return track_corrected, corrected Track object
#
def getTrackInfo(artist_name, track_name):
# network authentication
last_Fm = getLastFmNetworkAuth()
# declare artist_name as artist object
artist = last_Fm.get_artist(artist_name)
# correct artist name
artist_corrected_name = artist.get_correction()
track = last_Fm.get_track(artist_corrected_name, track_name)
track_corrected_name = track.get_correction()
trackInfo = pylast.Track(artist_corrected_name, track_corrected_name,
last_Fm)
return trackInfo
##------------------------------------------------------------------------------
## get last fm network authentication
##
## @return network authentication token
#
def getLastFmNetworkAuth():
network = pylast.LastFMNetwork(config.LASTFM_API_KEY, config.LASTFM_API_SECRET)
return network
##------------------------------------------------------------------------------
## parse music items from website, put them into a list, sanitize lists,
## correct artist names and song titles with last.fm API and save list in a
## sqlite database for further usage
##
## @return network authentication token
#
def parseTracksIntoSongClassList(song_list):
# lists containing the Interpreter and title
interpreter_list =
title_list =
# fill lists with results
parseHtml(interpreter_list, title_list)
print(datetime.datetime.now(), "Done parsing html")
# remove bad characters from lists
sanitize(interpreter_list)
sanitize(title_list)
# get Token and create spotify object
sp = spotipy.Spotify(getToken())
# correct artist and title names
for element in range(len(interpreter_list)):
track_info = getTrackInfo(interpreter_list[element],
title_list[element])
title = str(track_info.get_name())
artist = str(track_info.get_artist())
if (title != artist):
if (title is not None):
title_list[element] = title
if (artist is not None):
interpreter_list[element] = artist
else:
title_list[element] = title_list[element]
interpreter_list[element] = interpreter_list[element]
# get spotify uri for song
spotify_uri = getUri(sp, interpreter_list[element], title_list[element])
if (spotify_uri != None and len(spotify_uri) != 0):
track_uri = str(spotify_uri)
song_list.append(Song(interpreter_list[element],
title_list[element], track_uri))
print(datetime.datetime.now(), "Done parsing songs")
##------------------------------------------------------------------------------
## insert new songs to database, checks for duplicates and ignores them
##
## @param song_list, list containing songs which need to be inserted
## into database
#
def updateDatabase(song_list):
conn = sqlite3.connect('SongDatabase.db')
c = conn.cursor()
# date to insert into table
today = datetime.date.today()
today.strftime('%Y-%m-%d')
c.execute('''CREATE TABLE IF NOT EXISTS songs
(SongID INTEGER PRIMARY KEY, artist_name TEXT, song_name TEXT,
spotify_uri TEXT, UploadDate TIMESTAMP, Uploaded INTEGER,
UNIQUE(artist_name, song_name, spotify_uri) ON CONFLICT IGNORE)''')
for item in range(len(song_list)):
c.execute('''INSERT INTO songs
(artist_name, song_name, spotify_uri, UploadDate, Uploaded)
VALUES (?,?,?,?,?)''', (song_list[item].artist, song_list[item].song,
song_list[item].spotify_uri, today, 0))
conn.commit()
c.close()
print(datetime.datetime.now(), "Done updating Database")
##------------------------------------------------------------------------------
## copy Uris from song_list into new list
##
## @param song_list, list containing songs which get copied into new list
## @return track_list, list containing all song uris
#
def getUrisList(song_list):
uri_list =
for song in range(len(song_list)):
uri_list.append(song_list[song].spotify_uri)
print(uri_list)
return uri_list
##------------------------------------------------------------------------------
## Main part of the program
## get html and parse important parts into file
#
if __name__ == '__main__':
# list to fill with corrected songs
song_list =
# parse songs into song_list
parseTracksIntoSongClassList(song_list)
# insert song_list into database
updateDatabase(song_list)
dataManager.py
# -*- coding: utf-8 -*-
# import config file
import config
import sqlite3
import pandas as pd
# spotipy library
import spotipy
import spotipy.util as util
##------------------------------------------------------------------------------
## create Token with given credentials
##
## @return authentication token
#
def getToken():
# authetication token
token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
config.CLIENT_SECRET, config.REDIRECT_URI)
return token
##------------------------------------------------------------------------------
## insert new songs to database, checks for duplicates and ignores them
##
## @param song_list, list containing songs to be inserted into database
#
def uploadSongsToSpotify():
# declare db name
database_name = 'SongDatabase.db'
# spotify auth token
sp = spotipy.Spotify(getToken())
if sp:
# spotify username
username = config.USERNAME
# spotify ide of playlist
playlist_id = config.PLAYLIST_ID
conn = sqlite3.connect(database_name)
c = conn.cursor()
c.execute("""SELECT spotify_uri FROM songs WHERE (Uploaded = 0)""")
# save query results in tuple
data = c.fetchall()
# save uris in list, for spotipy
uri_list =
for item in range(len(data)):
uri_list.append(str(data[item][0]))
print(uri_list)
# upload uri_list to spotify
# check for empty list
if (len(uri_list) != 0):
sp.user_playlist_add_tracks(username, playlist_id, uri_list)
# set Uploaded values in database to 1
c.execute("""UPDATE songs SET Uploaded = ? WHERE Uploaded = ?""", (1, 0))
conn.commit()
else:
raise Exception("There aren't any new songs in database, songs were already uploaded")
c.close()
else:
raise Exception("Could not get token from spotify API")
if __name__ == '__main__':
uploadSongsToSpotify()
object-oriented python-3.x
New contributor
$endgroup$
My Python program parses songs from a website, corrects song titles and artists with the Last.fm API, searches for the spotify uri using the Spotify API, stores all the information in a SQLite database and then uploads it into a Spotify playlist with the Spotify API.
I would like to make the program object oriented and need some advice on how to do that.
Some general python advice would also be useful.
Thanks in advance guys!
I have a separate config.py file with all the needed API variables.
scraper.py
# -*- coding: utf-8 -*-
# import config file
import config
# import libraries
from bs4 import BeautifulSoup
import datetime
import urllib.request as urllib
import sys
import time
import re
import sqlite3
# webdriver libraries
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
# spotipy library
import spotipy
import spotipy.util as util
# import pylast
import pylast
# song class holds information about each song
class Song:
artist = None
song = None
spotify_uri = None
def __init__(self, artist, song, spotify_uri):
self.artist = artist
self.song = song
self.spotify_uri = spotify_uri
def printSong(self):
print(self.artist, '-', self.song, ', Uri:', self.spotify_uri)
##------------------------------------------------------------------------------
## Get Date of latest sunday
##
## @return formatted date of last sunday as yyyymmdd
#
def getSundayDate():
today = datetime.date.today()
sun_offset = (today.weekday() - 6) % 7
sunday_of_week = today - datetime.timedelta(days=sun_offset)
sunday_date = sunday_of_week.strftime('%Y%m%d')
return sunday_date
##------------------------------------------------------------------------------
## URL Pattern
##
## https://fm4.orf.at/player/20190120/SSU
## URL pattern:
## /yyyymmdd/SSU
## /20190120/SSU
## SSU is just Sunny Side Up the show from 10am till 1pm
## URL pattern changes ever day, we need to change it every week,
## to only get sundays
##
## @return concatenated URL of website
def getURLPattern():
return 'https://fm4.orf.at/player/' + getSundayDate() + '/SSU'
##------------------------------------------------------------------------------
## Get html source from page specified by page_url
##
## @return html source as beautiful soup object
#
def getHtmlFromPage():
page_URL = getURLPattern()
options = Options()
options.headless = True
profile = webdriver.FirefoxProfile()
profile.set_preference("media.volume_scale", "0.0")
driver = webdriver.Firefox(options=options, firefox_profile=profile)
driver.get(page_URL)
wait = WebDriverWait(driver, 3)
wait.until(EC.presence_of_element_located((By.CLASS_NAME,
'broadcast-items-list')))
time.sleep(1)
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()
return soup
##------------------------------------------------------------------------------
## remove bad characters from list
##
## @param list, list with elements to check
#
def sanitize(strList):
regex_remove = r'([^A-zsdäöüÄÖÜß-][\^]?)'
regex_ft = r'(ft.?([^n]s?w*)+)'
# check for bad characters
for i in range(len(strList)):
strList[i] = re.sub(regex_remove, "", str(strList[i]))
strList[i] = re.sub(regex_ft, "", strList[i])
##------------------------------------------------------------------------------
## print music
##
## @param lists to print
#
def printMusic(interpreter_list, title_list):
for element in range(len(interpreter_list)):
print(interpreter_list[element] + " : " + title_list[element])
##------------------------------------------------------------------------------
## parse html
##
## @param lists to write results to
#
def parseHtml(interpreter_list, title_list):
soup = getHtmlFromPage()
# find all interpreter in playlist
interpreter = soup.find_all("div", {"class": "interpreter"})
# find all titles in playlist
title = soup.find_all("div", {"class": "title"})
# Check for errors
if (len(interpreter) != len(title)):
raise Exception("The amount of interpreters don't correspond" +
"to the amount of titles.")
if (len(interpreter) == 0):
raise Exception("No FM4 music playlist found in given url")
for element in range(len(interpreter)):
interpreter_list.append(interpreter[element].text)
title_list.append(title[element].text)
##------------------------------------------------------------------------------
## create Token with given credentials
##
## @return authentication token
#
def getToken():
# authetication token
token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
config.CLIENT_SECRET, config.REDIRECT_URI)
if token:
return token
else:
raise Exception("Could not get authentication token from spotify!")
##------------------------------------------------------------------------------
## search track and get spotify uri
##
## @param token, authentication token
## @param interpreter && title, strings containing track info
## @return uri string
#
def getUri(spotify_Obj, interpreter, title):
result = spotify_Obj.search(q=interpreter + ' ' + title)
if (result != None):
if (len(result['tracks']['items']) != 0):
track_id = result['tracks']['items'][0]['uri']
uri = str(track_id)
return uri
##------------------------------------------------------------------------------
## correct artist name and track title with lastFm api
##
## @param1 artist_name, name of artist to correct
## @param2 title_name, title name to correct
## @return track_corrected, corrected Track object
#
def getTrackInfo(artist_name, track_name):
# network authentication
last_Fm = getLastFmNetworkAuth()
# declare artist_name as artist object
artist = last_Fm.get_artist(artist_name)
# correct artist name
artist_corrected_name = artist.get_correction()
track = last_Fm.get_track(artist_corrected_name, track_name)
track_corrected_name = track.get_correction()
trackInfo = pylast.Track(artist_corrected_name, track_corrected_name,
last_Fm)
return trackInfo
##------------------------------------------------------------------------------
## get last fm network authentication
##
## @return network authentication token
#
def getLastFmNetworkAuth():
network = pylast.LastFMNetwork(config.LASTFM_API_KEY, config.LASTFM_API_SECRET)
return network
##------------------------------------------------------------------------------
## parse music items from website, put them into a list, sanitize lists,
## correct artist names and song titles with last.fm API and save list in a
## sqlite database for further usage
##
## @return network authentication token
#
def parseTracksIntoSongClassList(song_list):
# lists containing the Interpreter and title
interpreter_list =
title_list =
# fill lists with results
parseHtml(interpreter_list, title_list)
print(datetime.datetime.now(), "Done parsing html")
# remove bad characters from lists
sanitize(interpreter_list)
sanitize(title_list)
# get Token and create spotify object
sp = spotipy.Spotify(getToken())
# correct artist and title names
for element in range(len(interpreter_list)):
track_info = getTrackInfo(interpreter_list[element],
title_list[element])
title = str(track_info.get_name())
artist = str(track_info.get_artist())
if (title != artist):
if (title is not None):
title_list[element] = title
if (artist is not None):
interpreter_list[element] = artist
else:
title_list[element] = title_list[element]
interpreter_list[element] = interpreter_list[element]
# get spotify uri for song
spotify_uri = getUri(sp, interpreter_list[element], title_list[element])
if (spotify_uri != None and len(spotify_uri) != 0):
track_uri = str(spotify_uri)
song_list.append(Song(interpreter_list[element],
title_list[element], track_uri))
print(datetime.datetime.now(), "Done parsing songs")
##------------------------------------------------------------------------------
## insert new songs to database, checks for duplicates and ignores them
##
## @param song_list, list containing songs which need to be inserted
## into database
#
def updateDatabase(song_list):
conn = sqlite3.connect('SongDatabase.db')
c = conn.cursor()
# date to insert into table
today = datetime.date.today()
today.strftime('%Y-%m-%d')
c.execute('''CREATE TABLE IF NOT EXISTS songs
(SongID INTEGER PRIMARY KEY, artist_name TEXT, song_name TEXT,
spotify_uri TEXT, UploadDate TIMESTAMP, Uploaded INTEGER,
UNIQUE(artist_name, song_name, spotify_uri) ON CONFLICT IGNORE)''')
for item in range(len(song_list)):
c.execute('''INSERT INTO songs
(artist_name, song_name, spotify_uri, UploadDate, Uploaded)
VALUES (?,?,?,?,?)''', (song_list[item].artist, song_list[item].song,
song_list[item].spotify_uri, today, 0))
conn.commit()
c.close()
print(datetime.datetime.now(), "Done updating Database")
##------------------------------------------------------------------------------
## copy Uris from song_list into new list
##
## @param song_list, list containing songs which get copied into new list
## @return track_list, list containing all song uris
#
def getUrisList(song_list):
uri_list =
for song in range(len(song_list)):
uri_list.append(song_list[song].spotify_uri)
print(uri_list)
return uri_list
##------------------------------------------------------------------------------
## Main part of the program
## get html and parse important parts into file
#
if __name__ == '__main__':
# list to fill with corrected songs
song_list =
# parse songs into song_list
parseTracksIntoSongClassList(song_list)
# insert song_list into database
updateDatabase(song_list)
dataManager.py
# -*- coding: utf-8 -*-
# import config file
import config
import sqlite3
import pandas as pd
# spotipy library
import spotipy
import spotipy.util as util
##------------------------------------------------------------------------------
## create Token with given credentials
##
## @return authentication token
#
def getToken():
# authetication token
token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
config.CLIENT_SECRET, config.REDIRECT_URI)
return token
##------------------------------------------------------------------------------
## insert new songs to database, checks for duplicates and ignores them
##
## @param song_list, list containing songs to be inserted into database
#
def uploadSongsToSpotify():
# declare db name
database_name = 'SongDatabase.db'
# spotify auth token
sp = spotipy.Spotify(getToken())
if sp:
# spotify username
username = config.USERNAME
# spotify ide of playlist
playlist_id = config.PLAYLIST_ID
conn = sqlite3.connect(database_name)
c = conn.cursor()
c.execute("""SELECT spotify_uri FROM songs WHERE (Uploaded = 0)""")
# save query results in tuple
data = c.fetchall()
# save uris in list, for spotipy
uri_list =
for item in range(len(data)):
uri_list.append(str(data[item][0]))
print(uri_list)
# upload uri_list to spotify
# check for empty list
if (len(uri_list) != 0):
sp.user_playlist_add_tracks(username, playlist_id, uri_list)
# set Uploaded values in database to 1
c.execute("""UPDATE songs SET Uploaded = ? WHERE Uploaded = ?""", (1, 0))
conn.commit()
else:
raise Exception("There aren't any new songs in database, songs were already uploaded")
c.close()
else:
raise Exception("Could not get token from spotify API")
if __name__ == '__main__':
uploadSongsToSpotify()
object-oriented python-3.x
object-oriented python-3.x
New contributor
New contributor
New contributor
asked 3 mins ago
wertherwerther
11
11
New contributor
New contributor
add a comment |
add a comment |
0
active
oldest
votes
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
werther is a new contributor. Be nice, and check out our Code of Conduct.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f212276%2fhow-can-i-make-this-python-code-more-object-oriented-uploading-songs-from-web%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
0
active
oldest
votes
0
active
oldest
votes
active
oldest
votes
active
oldest
votes
werther is a new contributor. Be nice, and check out our Code of Conduct.
werther is a new contributor. Be nice, and check out our Code of Conduct.
werther is a new contributor. Be nice, and check out our Code of Conduct.
werther is a new contributor. Be nice, and check out our Code of Conduct.
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f212276%2fhow-can-i-make-this-python-code-more-object-oriented-uploading-songs-from-web%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown