[DE 프로젝트: 음악 추천 챗봇 'Sixpotify'] 2. AWS RDS PostgreSQL

데이터베이스

데이터 타입

스크린샷 2021-10-03 13 54 48

뉴메릭(Numeric)

스크린샷 2021-10-03 13 55 50

날짜 및 시간(Date and Time)

스크린샷 2021-10-03 13 56 27

캐릭터(Character)

스크린샷 2021-10-03 13 57 31

PostgreSQL

AWS RDS 연동

DB 생성

  • PostgreSQL 12.8 버전과 프리 티어를 선택한다.

스크린샷 2021-10-04 03 44 53

  • ‘Public Access’를 허용한다.

스크린샷 2021-10-04 03 45 56

VPC 보안 그룹 설정

  • 인바운드 규칙을 다음과 같이 추가한다.

스크린샷 2021-10-04 03 47 04

데이터 모델

심볼과 표기법

스크린샷 2021-10-03 22 18 56

프로젝트 스키마 계획

스크린샷 2021-10-03 22 29 24

API에서 DB로 저장

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import sys
import requests
import base64
import json
import logging
import psycopg2
import time
import csv


client_id = ""
client_secret = ""

host = ""
port = 5432
username = ""
database = ""
password = ""


def main():

    try:
        conn = psycopg2.connect(
            host=host,
            database=database,
            user=username,
            password=password)
        cursor = conn.cursor()
    except:
        logging.error("could not connect to rds")
        sys.exit(1)

    headers = get_headers(client_id, client_secret)

    # # one artist
    # params = {  
    #     "q": 'bts',
    #     "type": "artist",
    #     "limit": "1"
    # }

    # r = requests.get("https://api.spotify.com/v1/search", params=params, headers=headers)

    # raw = json.loads(r.text)
    # print(raw)
    # sys.exit()


    # Create Table
    cursor.execute("""CREATE TABLE artists (
        id VARCHAR PRIMARY KEY NOT NULL,
        name VARCHAR,
        followers INT,
        popularity INT,
        url VARCHAR,
        image_url VARCHAR)
    """)
    conn.commit()
    # sys.exit()

    
    # Spotify Search API
    artists = []
    with open('data/csv/artist_list.csv') as f:
        raw = csv.reader(f)
        for row in raw:
            artists.append(row[0])

    for a in artists:
        params = {
            "q": a,
            "type": "artist",
            "limit": "1"
        }
    
        r = requests.get("https://api.spotify.com/v1/search", params=params, headers=headers)
    
        raw = json.loads(r.text)
    
        artist = {}
        try:
            artist_raw = raw['artists']['items'][0]
            if a == artist_raw['name']:
                artist.update(
                    {
                        'id': artist_raw['id'],
                        'name': artist_raw['name'],
                        'followers': artist_raw['followers']['total'],
                        'popularity': artist_raw['popularity'],
                        'url': artist_raw['external_urls']['spotify'],
                        'image_url': artist_raw['images'][0]['url']
                    }
                )
                insert_row(cursor, artist, 'artists')
            else:
                print('discordance')
        except:
            logging.error('something worng')
            continue

    conn.commit()
    # sys.exit()


    # Create Table
    cursor.execute("""CREATE TABLE artist_genre (
        artist_id VARCHAR PRIMARY KEY NOT NULL,
        genre VARCHAR,
        FOREIGN KEY(artist_id) REFERENCES artists(id))
    """)
    conn.commit()


    cursor.execute("SELECT id FROM artists")
    artists = []

    for (id, ) in cursor.fetchall():
        artists.append(id)

    artist_batch = [artists[i: i+50] for i in range(0, len(artists), 50)]

    artist_genres = []
    for i in artist_batch:

        ids = ','.join(i)
        URL = "https://api.spotify.com/v1/artists/?ids={}".format(ids)

        r = requests.get(URL, headers=headers)
        raw = json.loads(r.text)

        for artist in raw['artists']:
            for genre in artist['genres']:

                artist_genres.append(
                    {
                        'artist_id': artist['id'],
                        'genre': genre
                    }
                )

    for data in artist_genres:
        insert_row(cursor, data, 'artist_genre')

    conn.commit()
    cursor.close()

    sys.exit(0)




    try:
        r = requests.get("https://api.spotify.com/v1/search", params=params, headers=headers)

    except:
        logging.error(r.text)
        sys.exit(1)


    r = requests.get("https://api.spotify.com/v1/search", params=params, headers=headers)

    if r.status_code != 200:
        logging.error(r.text)

        if r.status_code == 429:

            retry_after = json.loads(r.headers)['Retry-After']
            time.sleep(int(retry_after))

            r = requests.get("https://api.spotify.com/v1/search", params=params, headers=headers)

        ## access_token expired
        elif r.status_code == 401:

            headers = get_headers(client_id, client_secret)
            r = requests.get("https://api.spotify.com/v1/search", params=params, headers=headers)

        else:
            sys.exit(1)


    # Get BTS' Albums

    r = requests.get("https://api.spotify.com/v1/artists/3Nrfpe0tUJi4K4DXYWgMUX/albums", headers=headers)

    raw = json.loads(r.text)

    total = raw['total']
    offset = raw['offset']
    limit = raw['limit']
    next = raw['next']

    albums = []
    albums.extend(raw['items'])

    ## 100
    while next:

        r = requests.get(raw['next'], headers=headers)
        raw = json.loads(r.text)
        next = raw['next']
        print(next)

        albums.extend(raw['items'])
        count = len(albums)

    print(len(albums))



def get_headers(client_id, client_secret):

    endpoint = "https://accounts.spotify.com/api/token"
    encoded = base64.b64encode("{}:{}".format(client_id, client_secret).encode('utf-8')).decode('ascii')

    headers = {
        "Authorization": "Basic {}".format(encoded)
    }

    payload = {
        "grant_type": "client_credentials"
    }

    r = requests.post(endpoint, data=payload, headers=headers)

    access_token = json.loads(r.text)['access_token']

    headers = {
        "Authorization": "Bearer {}".format(access_token)
    }

    return headers


def insert_row(cursor, data, table):

    placeholders = ', '.join(['%s'] * len(data))
    columns = ', '.join(data.keys())
    key_placeholders = ', '.join(['{0}=%s'.format(k) for k in data.keys()])
    sql = "INSERT INTO %s ( %s ) VALUES ( %s ) ON CONFLICT ( %s ) DO UPDATE SET  %s" % (table, columns, placeholders, list(data.keys())[0] ,key_placeholders)
    cursor.execute(sql, list(data.values())*2)




if __name__=='__main__':
    main()

artists

name followers popularity
2Pac 11320693 81
50 Cent 7820442 83
A Thousand Horses 69243 45
ABBA 6595341 83
ABC 276038 54
Aerosmith 10977177 78
Agnetha Fältskog 50533 48
Alan Jackson 1690446 75
Albert King 347862 53
Alice Cooper 2709110 66

artist_genre

artist_id genre
3Nrfpe0tUJi4K4DXYWgMUX k-pop boy group
1ZwdS5xdxEREPySFridCfh west coast rap
3q7HBObVc0L8jNeTe5Gofh rap
55RI2GNCfyXr0f14uIdhwd modern country rock
0LcJLqbBmaGUft1e9Mm8HV swedish pop
2s79xe5F6eUQkjwjww27Fh synthpop
7Ey4PD4MYsKc5I2dolUwbH rock
7fUtt9kVZOyn9LWy0JbDRI new wave pop
4mxWe1mtYIYfP040G38yvS country road
5aygfDCEaX5KTZOxSCpT9o traditional blues
0%