[DE 프로젝트: 음악 추천 챗봇 'Sixpotify'] 7. 페이스북 챗봇

아티스트 유사도

  • Artist track들의 audio feature 데이터에 대해 평균을 낸 값을 사용하여 Artist 끼리의 유사도를 계산한다.
  • 아테나(Athena)에 미리 만들어놓았던 두가지 top_tracks와 audio_features 테이블을 이용하여 유사도를 구하고 해당 유사도를 PostgreSQL 데이터베이스에 삽입한다.

아테나(Athena) 연동

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import sys
import os
import logging
import psycopg2
import boto3
import time
import math

host = ""
port = 5432
username = ""
database = ""
password = ""

def main():

    try:
        conn = psycopg2.connect(
        host=host,
        database=database,
        user=username,
        password=password)
        cursor = conn.cursor()
    except:
        logging.error("could not connect to rds")
        sys.exit(1)

    # Create Table
    # cursor.execute("""CREATE TABLE related_artists (
    #     artist_id VARCHAR(255) PRIMARY KEY,
    #     y_artist VARCHAR(255),
    #     distance FLOAT)
    # """)
    # conn.commit()
    # sys.exit()

    athena = boto3.client('athena')

    query = """
        SELECT
         artist_id,
         AVG(danceability) AS danceability,
         AVG(energy) AS energy,
         AVG(loudness) AS loudness,
         AVG(speechiness) AS speechiness,
         AVG(acousticness) AS acousticness,
         AVG(instrumentalness) AS instrumentalness
        FROM
         top_tracks t1
        JOIN
         audio_features t2 ON t2.id = t1.id AND CAST(t1.dt AS DATE) = DATE('2021-10-17') AND CAST(t2.dt AS DATE) = DATE('2021-10-17')
        GROUP BY t1.artist_id
        LIMIT 20
    """

    r = query_athena(query, athena)
    results = get_query_result(r['QueryExecutionId'], athena)
    artists = process_data(results)
    

    query = """
        SELECT
         MIN(danceability) AS danceability_min,
         MAX(danceability) AS danceability_max,
         MIN(energy) AS energy_min,
         MAX(energy) AS energy_max,
         MIN(loudness) AS loudness_min,
         MAX(loudness) AS loudness_max,
         MIN(speechiness) AS speechiness_min,
         MAX(speechiness) AS speechiness_max,
         ROUND(MIN(acousticness),4) AS acousticness_min,
         MAX(acousticness) AS acousticness_max,
         MIN(instrumentalness) AS instrumentalness_min,
         MAX(instrumentalness) AS instrumentalness_max
        FROM
         audio_features
    """
    r = query_athena(query, athena)
    results = get_query_result(r['QueryExecutionId'], athena)
    avgs = process_data(results)[0]

    metrics = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness']
    
    for i in artists:
        for j in artists:
            dist = 0
            for k in metrics:
                x = float(i[k])
                x_norm = normalize(x, float(avgs[k+'_min']), float(avgs[k+'_max']))
                y = float(j[k])
                y_norm = normalize(y, float(avgs[k+'_min']), float(avgs[k+'_max']))
                dist += (x_norm-y_norm)**2

            dist = math.sqrt(dist) ## euclidean distance

            data = {
                'artist_id': i['artist_id'],
                'y_artist': j['artist_id'],
                'distance': dist
            }

            insert_row(cursor, data, 'related_artists')


    conn.commit()
    cursor.close()


def normalize(x, x_min, x_max):

    normalized = (x-x_min) / (x_max-x_min)

    return normalized


def query_athena(query, athena):
    response = athena.start_query_execution(
        QueryString=query,
        QueryExecutionContext={
            'Database': 'default'
        },
        ResultConfiguration={
            'OutputLocation': "s3://6mini-spotify/athena-panomix-tables/",
            'EncryptionConfiguration': {
                'EncryptionOption': 'SSE_S3'
            }
        }
    )

    return response


def get_query_result(query_id, athena):

    response = athena.get_query_execution(
        QueryExecutionId=str(query_id)
    )

    while response['QueryExecution']['Status']['State'] != 'SUCCEEDED':
        if response['QueryExecution']['Status']['State'] == 'FAILED':
            logging.error('QUERY FAILED')
            break
        time.sleep(5)
        response = athena.get_query_execution(
            QueryExecutionId=str(query_id)
        )

    response = athena.get_query_results(
        QueryExecutionId=str(query_id),
        MaxResults=1000
    )

    return response


def process_data(results):

    columns = [col['Label'] for col in results['ResultSet']['ResultSetMetadata']['ColumnInfo']]

    listed_results = []
    for res in results['ResultSet']['Rows'][1:]:
        values = []
        for field in res['Data']:
            try:
                values.append(list(field.values())[0])
            except:
                values.append(list(' '))
        listed_results.append(dict(zip(columns, values)))

    return listed_results


def insert_row(cursor, data, table):
    
    placeholders = ', '.join(['%s'] * len(data))
    columns = ', '.join(data.keys())
    key_placeholders = ', '.join(['{0}=%s'.format(k) for k in data.keys()])
    sql = "INSERT INTO %s ( %s ) VALUES ( %s ) ON CONFLICT ( %s ) DO UPDATE SET  %s" % (table, columns, placeholders, list(data.keys())[0] ,key_placeholders)
    cursor.execute(sql, list(data.values())*2)


if __name__=='__main__':
    main()
artist_id y_artist distance
0LcJLqbBmaGUft1e9Mm8HV 7A9yZMTrFZcgEWAX2kBfK6 0.27980773482656995
1ZwdS5xdxEREPySFridCfh 7fIvjotigTGWqjIz6EP1i4 0.2882160292783355
3EhbVgyfGd7HkpsagwL9GS 7A9yZMTrFZcgEWAX2kBfK6 0.26411908256068184
7xkAwz0bQTGDSbkofyQt3U 7fIvjotigTGWqjIz6EP1i4 0.186260244237497
6Q192DXotxtaysaqNPy5yR 7A9yZMTrFZcgEWAX2kBfK6 0.3029041230758847
45lorWzrKLxfKlWpV7r9CN 7fIvjotigTGWqjIz6EP1i4 0.38931553129958446
0WgyCbru4tXnMsbTmX4mFw 7A9yZMTrFZcgEWAX2kBfK6 0.38289397784182017
1By9QBFnjZAoI83BZppHlt 7fIvjotigTGWqjIz6EP1i4 0.16577755424396057
4pJCawaKSZ40EnxN0YEYw3 7A9yZMTrFZcgEWAX2kBfK6 0.16981935349152513
6O74knDqdv3XaWtkII7Xjp 7fIvjotigTGWqjIz6EP1i4 0.2553551414854985
3rfgbfpPSfXY40lzRK7Syt 7A9yZMTrFZcgEWAX2kBfK6 0.11591289512496193

페이스북 메신저 API

람다 함수(Lambda Func) 생성

image

API 게이트웨이(Gateway) 생성

image

메소드(method) 설정

image

image

맵핑 템플릿(mapping template)

image

배포(Deploy)

image

URL 복사

image

토큰 생성

image

S3 버킷 생성

image

구조

1
2
3
4
5
6
7
chatbot
├── libs
├── psycopg2
├── deploy.sh
├── fb_bot.py
├── lambda_handler.py
└── requirements.txt
  • psycopg가 lambda용이 따로 있기 때문에 이번엔 모든 라이브러리를 설치한 상태에서 zip하여 보내준다.

deploy.sh

1
2
3
4
5
6
7
8
#!/bin/bash

rm *.zip
zip spotify.zip -r *

aws s3 rm s3://sixpotify/spotify.zip
aws s3 cp ./spotify.zip s3://sixpotify/spotify.zip
aws lambda update-function-code --function-name sixpotify --s3-bucket sixpotify --s3-key spotify.zip

requirements.txt

1
requests

fb_bot.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#!/usr/bin/env python

import sys
sys.path.append("./libs")
import os
import requests
import base64
import json
import logging
from enum import Enum

DEFAULT_API_VERSION = 4.0

## messaging types: "RESPONSE", "UPDATE", "MESSAGE_TAG"

class NotificationType(Enum):
    regular = "REGULAR"
    silent_push = "SILENT_PUSH"
    no_push = "no_push"

class Bot:

    def __init__(self, access_token, **kwargs):

        self.access_token = access_token
        self.api_version = kwargs.get('api_version') or DEFAULT_API_VERSION
        self.graph_url = 'https://graph.facebook.com/v{0}'.format(self.api_version)

    @property
    def auth_args(self):
        if not hasattr(self, '_auth_args'):
            auth = {
                'access_token': self.access_token
            }
            self._auth_args = auth
        return self._auth_args

    def send_message(self, recipient_id, payload, notification_type, messaging_type, tag):

        payload['recipient'] = {
            'id': recipient_id
        }

        #payload['notification_type'] = notification_type
        payload['messaging_type'] = messaging_type

        if tag is not None:
            payload['tag'] = tag

        request_endpoint = '{0}/me/messages'.format(self.graph_url)

        response = requests.post(
            request_endpoint,
            params = self.auth_args,
            json = payload
        )

        logging.info(payload)
        return response.json()

    def send_text(self, recipient_id, text, notification_type = NotificationType.regular, messaging_type = 'RESPONSE', tag = None):

        return self.send_message(
            recipient_id,
            {
                "message": {
                    "text": text
                }
            },
            notification_type,
            messaging_type,
            tag
        )

    def send_quick_replies(self, recipient_id, text, quick_replies, notification_type = NotificationType.regular, messaging_type = 'RESPONSE', tag = None):

        return self.send_message(
            recipient_id,
            {
                "message":{
                    "text": text,
                    "quick_replies": quick_replies
                }
            },
            notification_type,
            messaging_type,
            tag
        )

    def send_attachment(self, recipient_id, attachment_type, payload, notification_type = NotificationType.regular, messaging_type = 'RESPONSE', tag = None):

        return self.send_message(
            recipient_id,
            {
                "message": {
                    "attachment":{
                        "type": attachment_type,
                        "payload": payload
                    }
                }
            },
            notification_type,
            messaging_type,
            tag
        )

    def send_action(self, recipient_id, action, notification_type = NotificationType.regular, messaging_type = 'RESPONSE', tag = None):

        return self.send_message(
            recipient_id,
            {
                "sender_action": action
            },
            notification_type,
            messaging_type,
            tag
        )

    def whitelist_domain(self, domain_list, domain_action_type):

        payload = {
            "setting_type": "domain_whitelisting",
            "whitelisted_domains": domain_list,
            "domain_action_type": domain_action_type
        }

        request_endpoint = '{0}/me/thread_settings'.format(self.graph_url)

        response = requests.post(
            request_endpoint,
            params = self.auth_args,
            json = payload
        )

        return response.json()

    def set_greeting(self, template):

        request_endpoint = '{0}/me/thread_settings'.format(self.graph_url)

        response = requests.post(
            request_endpoint,
            params = self.auth_args,
            json = {
                "setting_type": "greeting",
                "greeting": {
                    "text": template
                }
            }
        )

        return response

    def set_get_started(self, text):

        request_endpoint = '{0}/me/messenger_profile'.format(self.graph_url)

        response = requests.post(
            request_endpoint,
            params = self.auth_args,
            json = {
                "get_started":{
                    "payload": text
                }
            }
        )

        return response

    def get_get_started(self):

        request_endpoint = '{0}/me/messenger_profile?fields=get_started'.format(self.graph_url)

        response = requests.get(
            request_endpoint,
            params = self.auth_args
        )

        return response

    def get_messenger_profile(self, field):

        request_endpoint = '{0}/me/messenger_profile?fields={1}'.format(self.graph_url, field)

        response = requests.get(
            request_endpoint,
            params = self.auth_args
        )

        return response


    def upload_attachment(self, url):

        request_endpoint = '{0}/me/message_attachments'.format(self.graph_url)

        response = requests.post(
            request_endpoint,
            params = self.auth_args,
            json = {
                "message":{
                    "attachment":{
                        "type": "image",
                        "payload": {
                            "is_reusable": True,
                            "url": url
                        }
                    }
                }
            }
        )

        return response

lambda_handler.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# -*- coding: utf-8 -*-
import sys
sys.path.append('./libs')
import logging
import requests
import psycopg2
import fb_bot
import json
import base64
import boto3

logger = logging.getLogger()
logger.setLevel(logging.INFO)

client_id = ""
client_secret = ""

PAGE_TOKEN = ""
VERIFY_TOKEN = ""

host = ""
port = 5432
username = ""
database = "postgres"
password = ""

try:
    conn = psycopg2.connect(
    host=host,
    database=database,
    user=username,
    password=password)
    cursor = conn.cursor()
except:
    logging.error("could not connect to rds")
    sys.exit(1)

bot = fb_bot.Bot(PAGE_TOKEN)


def lambda_handler(event, context):

    # event['params'] only exists for HTTPS GET

    if 'params' in event.keys():

        if event['params']['querystring']['hub.verify_token'] == VERIFY_TOKEN:
            return int(event['params']['querystring']['hub.challenge'])
        else:
            logging.error('wrong validation token')
            raise SystemExit
    else:
        messaging = event['entry'][0]['messaging'][0]
        user_id = messaging['sender']['id']

        logger.info(messaging)
        artist_name = messaging['message']['text']

        query = "SELECT image_url, url FROM artists WHERE name = '{}'".format(artist_name)
        try:
            cursor.execute(query)
            raw = cursor.fetchall()
            if len(raw) == 0:
                sys.exit(0)
        except:
            text = search_artist(cursor, artist_name)
            bot.send_text(user_id, text)
            sys.exit(0)

        image_url, url = raw[0]

        payload = {
            'template_type': 'generic',
            'elements': [
                {
                    'title': "Artist Info: '{}'".format(artist_name),
                    'image_url': image_url,
                    'subtitle': 'information',
                    'default_action': {
                        'type': 'web_url',
                        'url': url,
                        'webview_height_ratio': 'full'
                    }
                }
            ]
        }

        bot.send_attachment(user_id, "template", payload)

        query = "SELECT t2.genre FROM artists t1 JOIN artist_genres t2 ON t2.artist_id = t1.id WHERE t1.name = '{}'".format(artist_name)

        cursor.execute(query)
        genres = []
        for (genre, ) in cursor.fetchall():
            genres.append(genre)

        text = "Here are genres of {}".format(artist_name)
        bot.send_text(user_id, text)
        bot.send_text(user_id, ', '.join(genres))


        ## 만약에 아티스트가 없을시에는 아티스트 추가

        ## Spotify API hit --> Artist Search
        ## Database Upload
        ## One second
        ## 오타 및 아티스트가 아닐 경우


def get_headers(client_id, client_secret):

    endpoint = "https://accounts.spotify.com/api/token"
    encoded = base64.b64encode("{}:{}".format(client_id, client_secret).encode('utf-8')).decode('ascii')

    headers = {
        "Authorization": "Basic {}".format(encoded)
    }

    payload = {
        "grant_type": "client_credentials"
    }

    r = requests.post(endpoint, data=payload, headers=headers)

    access_token = json.loads(r.text)['access_token']

    headers = {
        "Authorization": "Bearer {}".format(access_token)
    }

    return headers


def insert_row(cursor, data, table):
    
    placeholders = ', '.join(['%s'] * len(data))
    columns = ', '.join(data.keys())
    key_placeholders = ', '.join(['{0}=%s'.format(k) for k in data.keys()])
    sql = "INSERT INTO %s ( %s ) VALUES ( %s ) ON CONFLICT ( %s ) DO UPDATE SET  %s" % (table, columns, placeholders, list(data.keys())[0] ,key_placeholders)
    cursor.execute("ROLLBACK")
    conn.commit()
    cursor.execute(sql, list(data.values())*2)

def invoke_lambda(fxn_name, payload, invocation_type='Event'):

    lambda_client = boto3.client('lambda')

    invoke_response = lambda_client.invoke(
        FunctionName = fxn_name,
        InvocationType = invocation_type,
        Payload = json.dumps(payload)
    )

    if invoke_response['StatusCode'] not in [200, 202, 204]:
        logging.error("ERROR: Invoking lmabda function: '{0}' failed".format(fxn_name))


    return invoke_response


def search_artist(cursor, artist_name):

    headers = get_headers(client_id, client_secret)

    ## Spotify Search API
    params = {
        "q": artist_name,
        "type": "artist",
        "limit": "1"
    }

    r = requests.get("https://api.spotify.com/v1/search", params=params, headers=headers)

    raw = json.loads(r.text)

    if raw['artists']['items'] == []:
        return "Could not find artist. Please Try Again!"

    artist = {}
    artist_raw = raw['artists']['items'][0]
    if artist_raw['name'] == artist_name:

        artist.update(
            {
                'id': artist_raw['id'],
                'name': artist_raw['name'],
                'followers': artist_raw['followers']['total'],
                'popularity': artist_raw['popularity'],
                'url': artist_raw['external_urls']['spotify'],
                'image_url': artist_raw['images'][0]['url']
            }
        )
        insert_row(cursor, artist, 'artists')
        conn.commit()
        for i in artist_raw['genres']:
            if len(artist_raw['genres']) != 0:
                insert_row(cursor, {'artist_id': artist_raw['id'], 'genre': i}, 'artist_genre')

        
        conn.commit()
        

        r = invoke_lambda('top-tracks', payload={'artist_id': artist_raw['id']})
        print(r)
        return "We added artist. Please try again in a second!"

        

    return "Could not find artist. Please Try Again!"

배포

1
2
3
$ chmod +x deploy.sh

$ ./deploy.sh

Facebook API 필드 추가

image

IAM 퍼미션(Permissions) 추가

image

테스트

  • 기존에 있는 아티스트를 넣으면 잘 가져온다.

image

  • 존재하지 않는 아티스트를 넣으면 Postgres와 DynamoDB에 자동으로 추가되며, ‘We added artist. Please try again in a second!’와 함께 다시 검색하면 전시된다.

image

  • 람다 핸들러를 수정하여, 가수 이름을 검색했을 때 그 가수의 장르를 전시하고 유사한 가수를 추천하는 서비스를 완성시킨다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
        query4 = "SELECT id url FROM artists WHERE name = '{}'".format(artist_name)

        cursor.execute(query4)
        raw4 = cursor.fetchall()
        id, = raw4[0]

        query2 = "SELECT y_artist FROM related_artists ra WHERE artist_id = '{}'".format(id)

        cursor.execute(query2)
        raw2 = cursor.fetchall()
        yid, = raw2[0]

        query3 = "SELECT name, image_url, url FROM artists WHERE id = '{}'".format(yid)

        cursor.execute(query3)
        raw3 = cursor.fetchall()
        name, image_url2, url2 = raw3[0]

        payload2 = {
            'template_type': 'generic',
            'elements': [
                {
                    'title': "유사한 가수: '{}'".format(name),
                    'image_url': image_url2,
                    'subtitle': 'information',
                    'default_action': {
                        'type': 'web_url',
                        'url': url2,
                        'webview_height_ratio': 'full'
                    }
                }
            ]
        }

        query = "SELECT t2.genre FROM artists t1 JOIN artist_genre t2 ON t2.artist_id = t1.id WHERE t1.name = '{}'".format(artist_name)

        cursor.execute(query)
        genres = []
        for (genre, ) in cursor.fetchall():
            genres.append(genre)

        text = "{}의 장르는?".format(artist_name)
        bot.send_text(user_id, text)
        bot.send_text(user_id, ', '.join(genres))
0%