Large scale LoL match data collection without production API Key

Pick and Ban phase는 사람이 진행해야 공평하다. 체스 랭크 게임을 AI가 플레이 하는 것은 핵이나 다름없지 않겠는가?

GameQuant의 개발을 위해서 라이엇에서 정식으로 API key를 발급받고 테라바이트 단위의 콜을 하기는 쉽지 않을 것이다. 우리에게 주어진 것은 Call rate limit이 뚜렷하게 걸려있는 personal key들 뿐.

허락을 받기 보단 용서를 빌으라 했던가, 일단은 데이터를 모아보았다.

  1. 라이엇 계정 생성 매크로 를 사용하여 500개가 넘는 계정을 자동으로 생성하였다. 이메일 인증이 되지 않은 계정은 API키를 발급받을 수 없는 관계로 fake email generator를 사용하여 생성된 이메일이 사라지기 전 가입 후 인증 절차까지 전부 자동화 하였다.

  2. API 키는 24시간에 한번씩 직접 클릭해 리셋시켜줘야만 그 access가 유지된다. 이 또한 selenium을 통해 해결했다. 물론 라이엇 측에서도 몇가지 방어체계를 구축 해 두었지만, anti captcha 앞에서는 힘없이 무너져 버렸다. 코드는 일부만 공개하겠다.

     from selenium import webdriver
     from selenium.webdriver.common.by import By
     from selenium.webdriver.support.ui import WebDriverWait
     from selenium.webdriver.chrome.service import Service
     from anticaptchaofficial.recaptchav2proxyless import *
     import threading
    
     t0 = time.time()
     allocation = 15
     with open('/home/ubuntu/gamequant/24.csv','r') as f:
         data = f.readlines()
    
    
     def collect(start):
         global allocation
         options = webdriver.ChromeOptions()
         options.add_argument("--headless")
         options.add_argument("--no-sandbox")
         options.add_argument("--disable-gpu")
    
         s = Service('/home/ubuntu/gamequant/chromedriver')
         browser = webdriver.Chrome(service=s, options=options)
    
         browser.get('domain')
         end = start + allocation
         if end >= len(data):
             end = len(data)
         for k, i in enumerate(data[start:end]):
             try:
                 i = i[:-1]
                 # filling form
                 id = i.split(',')[0]
                 password = i.split(',')[1]
                 WebDriverWait(browser, 120).until(lambda x: x.find_element(By.XPATH,
                                                                            '//*[@id="site-navbar-collapse"]/ul[2]/li/a'))
                 buttons = browser.find_elements(By.XPATH, '//*[@id="site-navbar-collapse"]/ul[2]/li/a')
                 for button in buttons:
                     button.click()
    
                 WebDriverWait(browser,120).until(lambda x: x.find_element(By.XPATH, '/html/body/div/div/div/div[2]/div/div/div[2]/div/div/div/div[1]/div/input'))
                 browser.find_element(By.XPATH, '/html/body/div/div/div/div[2]/div/div/div[2]/div/div/div/div[1]/div/input').send_keys(id)
                 browser.find_element(By.XPATH, '/html/body/div/div/div/div[2]/div/div/div[2]/div/div/div/div[2]/div/input').send_keys(password)
    
                 # a = ActionChains(browser)
                 browser.find_element(By.XPATH, '/html/body/div/div/div/div[2]/div/div/button').click()
                 # WebDriverWait(browser, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']")))
                 # WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[@id='recaptcha-anchor']"))).click()
    
                //중간 부분 코드 삭제
                 browser.execute_script(f'document.getElementById("g-recaptcha-response").innerHTML="{g_response}";')
                 browser.find_element(By.XPATH, '/html/body/div[2]/div/form/div[3]/div/div[3]/div[2]/div[2]/input').click()
    
                 new_api_key = browser.find_element(By.ID, 'apikey').get_attribute('value')
                 browser.get('logout')
    
    
                 print(new_api_key)
                 # print(LolWatcher(new_api_key).summoner.by_name("kr", 'username')["puuid"])
                 with open('/home/ubuntu/gamequant/api.csv', 'a') as f:
                     f.writelines(new_api_key+','+i+'\n')
             except Exception as E:
                 print('##########Error with', start, k, i, E)
    
     Pros = []
     if __name__ == "__main__":
         # change the value inside the range
         n = 4
         for i in range(n):
             print("Thread Started")
             p = threading.Thread(target=collect, args=(i*allocation,))
             Pros.append(p)
             p.start()
    
         for t in Pros:
             t.join()
         print(time.time()-t0)
    
  3. 수백개의 계정을 동원해 24시간에 한번씩 리셋시키며 AWS EC2 최강의 인스턴스들을 돌려가며 멀티쓰레딩으로 데이터를 모았다. 코드는 악용의 여지가 있으니 역시 아래 일부만 공개.

     from riotwatcher import LolWatcher
     import threading
     import time
     from selenium import webdriver
     from selenium.webdriver.common.by import By
     from selenium.webdriver.support.ui import WebDriverWait
     from selenium.webdriver.chrome.service import Service
     from anticaptchaofficial.recaptchav2proxyless import *
    
     ###### DEALING WITH ERROR ######
     # If there are too many 'error_puuid' printed, try one of these:
     # (1) change match to match_v5 in when calling api
     # (2) change match_v5 to match in when calling api
     # (3) check whether the apis are valid uncommenting the section 'error_puuid(3)'
    
    
     # to measure the time taken (comparing effectiveness of threading)
     t0 = time.time()
    
     # Initial settings
    
     # number of api key used
     num_api_key = 15
     num_summoner = 9573
     # reading files
     with open('/home/ubuntu/gamequant/api.csv', 'r') as f:
         data = f.readlines()
     print(data)
    
     apis = []
     idpw = []
    
     for i in data:
         i = i.split(',')
         idpw.append(i[1:])
         apis.append(i[0])
    
     with open("/home/ubuntu/gamequant/summoner_processed.csv") as f:
         names = f.readlines()
    
    
    
     #API Collector
     def api_collect(id, password):
         global allocation
         options = webdriver.ChromeOptions()
         options.add_argument("--headless")
         options.add_argument("--no-sandbox")
         options.add_argument("--disable-gpu")
    
         s = Service('/home/ubuntu/gamequant/chromedriver')
         browser = webdriver.Chrome(service=s, options=options)
    
         browser.get('https://developer.riotgames.com')
    
    
         try:
             WebDriverWait(browser, 120).until(lambda x: x.find_element(By.XPATH,
                                                                        '//*[@id="site-navbar-collapse"]/ul[2]/li/a'))
             buttons = browser.find_elements(By.XPATH, '//*[@id="site-navbar-collapse"]/ul[2]/li/a')
             for button in buttons:
                 button.click()
    
             WebDriverWait(browser,120).until(lambda x: x.find_element(By.XPATH, '/html/body/div/div/div/div[2]/div/div/div[2]/div/div/div/div[1]/div/input'))
             browser.find_element(By.XPATH, '/html/body/div/div/div/div[2]/div/div/div[2]/div/div/div/div[1]/div/input').send_keys(id)
             browser.find_element(By.XPATH, '/html/body/div/div/div/div[2]/div/div/div[2]/div/div/div/div[2]/div/input').send_keys(password)
    
             # a = ActionChains(browser)
             browser.find_element(By.XPATH, '/html/body/div/div/div/div[2]/div/div/button').click()
             # WebDriverWait(browser, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']")))
             # WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[@id='recaptcha-anchor']"))).click()
    
             # token = browser.find_element(By.XPATH, '/html/body/input').get_attribute('value')
             # print(token)
             # wait for "solved" selector to come up
             g_response = 0
             //코드 중간부분 삭제
             browser.execute_script(f'document.getElementById("g-recaptcha-response").innerHTML="{g_response}";')
             browser.find_element(By.XPATH, '/html/body/div[2]/div/form/div[3]/div/div[3]/div[2]/div[2]/input').click()
    
             new_api_key = browser.find_element(By.ID, 'apikey').get_attribute('value')
             browser.get('https://developer.riotgames.com/logout')
    
    
             print(new_api_key)
             return new_api_key
         except Exception as E:
             print('##########Error with', E)
    
    
     def collect(api_start, name_start, name_end):
         global apis, names
         # Making Dictionary to put api_keys
         config={}
         for i in range(num_api_key):
             config[str(i)] =''
    
    
         # Setting initial values (api setting, summoner name setting)
         api_end = api_start + num_api_key
         for i, api in enumerate(apis[api_start:api_end]):
             try:
                 Api = LolWatcher(api)
                 answer = Api.summoner.by_name(~~~~)
                 config[list(config.keys())[i]] = api
             except:
                 print('API_ERROR',i,api)
                 print(idpw[i+api_start])
                 Id, pwd = idpw[i+api_start][0], idpw[i+api_start][1]
                 temp_api = api_collect(Id, pwd[:-1])
                 print(temp_api)
                 config[list(config.keys())[i]] = temp_api
    
         api = LolWatcher(config['0'])
         num = 0
         api_call = 0
         thread_num = int(api_start/num_api_key)
         if name_end > len(names):
             name_end = len(names)
    
         # MAIN LOOP
         for k, i in enumerate(names[name_start:name_end]):
             x = 0
             # 23h stop
             '''
             if time.time() - t1 > 43200:
                 while text != 'A':
                     t1 = time.time()
                     print(api_start / num_api_key, k, i)
                     text = input(f'{thread_num}th Thread 23h: Press "A" to continue\n')
                 text = 'B'
                 print(f'{thread_num}th 23h Thread started')
                 with open('api.csv') as f:
                     apis = f.readlines()
                 print(apis)
                 for i, api in enumerate(apis[api_start:api_end]):
                     try:
                         Api = LolWatcher(api)
                         answer = @@*!&#(*!*
                         config[list(config.keys())[i]] = api
                     except:
                         print('API_ERROR', i, api)
             '''
             summoner_name = i[:-1]
             while True:
                 try:
                     if (api_call < 99):
                         answer = api.summoner.by_name("kr", '123123')["puuid"]
                         api_call += 1
                         break
                     elif (api_call >= 99):
                         num += 1
                         x = num % len(list(config.keys()))
                         api = LolWatcher(config[list(config.keys())[x]])
                         api_call = 0
                         answer = api.summoner.by_name("kr", '125152131')["puuid"]
                         print(f"{thread_num}, api_swap")
                         break
                 except:
                     Id, pwd = idpw[api_start + x][0], idpw[api_start + x][1]
                     temp_api = api_collect(Id, pwd[:-1])
                     config[list(config.keys())[x]] = temp_api
                     num += 1
                     x = num % len(list(config.keys()))
                     api = LolWatcher(config[list(config.keys())[x]])
                     api_call = 0
    
             try:
                 if(api_call < 99):
                     summoner_id = api.summoner.by_name("kr", summoner_name)["puuid"]
                     print(summoner_id)
                     api_call += 1
                 elif(api_call >= 99):
                     num += 1
                     x = num % len(list(config.keys()))
                     api = LolWatcher(config[list(config.keys())[x]])
                     api_call = 0
                     summoner_id = api.summoner.by_name("kr", summoner_name)["puuid"]
                     api_call += 1
                     print(f"{thread_num}, api_swap")
    
                 start = 0
                 matchlist = []
                 while True:
                     if (api_call < 99):
                       /통째로 삭제, 한개의 아이디  API Call 횟수를 제한시켜 에러를 방지하였음.
                             break
                 print(len(matchlist))
    
             except:
                 print(f"error_puuid{thread_num}", i)
                 continue
    
    
             # Extracting matchinfo from the games in matchlist
             for gameId in matchlist:
                 while(True):
                     try:
                         if(api_call < 99):
                             matchinfo = api.match_v5.by_id("ASIA", gameId)
                             api_call += 1
                         elif(api_call >= 99):
                             num += 1
                             x = num % len(list(config.keys()))
                             api = LolWatcher(config[list(config.keys())[x]])
                             api_call = 0
                             matchinfo = api.match_v5.by_id("ASIA", gameId)
                             api_call += 1
                             print(f"{thread_num}, api_swap")
    
    
                         if matchinfo["info"]["queueId"] != 420:
                             # print(matchinfo["info"]["queueId"])
                             # print(num)
                             continue
    
                         participants = matchinfo["info"]["participants"]
                         # print(participants[0]["teamId"])
                         # print(participants[0]["win"])
    
                         if (participants[0]["teamId"] == 100 and participants[0]["win"] == True) or (
                                 participants[0]["teamId"] != 100 and participants[0]["win"] != True):
                             Win = 1
                         else:
                             Win = 0
    
                         Blue = []
                         Red = []
                         Ban = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                         K = matchinfo["info"]["teams"][0]["bans"]
                         # print(K)
                         for i in K:
                             Ban[i["pickTurn"] - 1] = i["championId"]
    
                         K = matchinfo["info"]["teams"][1]["bans"]
                         for i in K:
                             Ban[i["pickTurn"] - 1] = i["championId"]
    
                         for i in range(10):
                             if participants[i]["teamId"] == 100:
                                 Blue.append(participants[i]["championId"])
                             elif participants[i]["teamId"] == 200:
                                 Red.append(participants[i]["championId"])
                             else:
                                 print("Error team Id does not exist: " + "teamId =" + participants[i]["teamId"])
    
                         if Blue[0] == Blue[1]:
                             continue
    
                         with open("/home/ubuntu/gamequant/match_data_ban.csv", "a") as f:
                             line = str([gameId]) + ","  + str(Ban) + "," + str(Blue) + "," + str(Red) + "," + str([Win]) + "\n"
                             f.write(line)
    
                         with open("/home/ubuntu/gamequant/match_data_ban_total.csv", 'a') as f:
                             line = str(matchinfo) + '\n'
                             f.write(line)
    
                         # print(gameId)
                         # print(api_call)
                         break
                     except Exception as e:
                         print(f"error pass{thread_num}",num, e)
                         while True:
                             try:
                                 if (api_call < 99):
                                     answer = api.summoner.by_name("kr", '124124')["puuid"]
                                     api_call += 1
                                     break
                                 elif (api_call >= 99):
                                     num += 1
                                     x = num % len(list(config.keys()))
                                     api = LolWatcher(config[list(config.keys())[x]])
                                     api_call = 0
                                     answer = api.summoner.by_name("kr", '231312313')["puuid"]
    
                                     print(f"{thread_num}, api_swap")
                                     break
                             except:
                                 Id, pwd = idpw[api_start + x][0], idpw[api_start + x][1]
                                 temp_api = api_collect(Id, pwd[:-1])
                                 config[list(config.keys())[x]] = temp_api
                                 num += 1
                                 x = num % len(list(config.keys()))
                                 api = LolWatcher(config[list(config.keys())[x]])
                                 api_call = 0
    
             print('Success', thread_num, k, i)
         print(f'Finished {thread_num}')
    
    
     Pros = []
     if __name__ == "__main__":
         # change the value inside the range
         n = len(apis) // num_api_key
         for i in range(n):
             print("Thread Started")
             p = threading.Thread(target=collect, args=(i*num_api_key, i*num_summoner, (i+1)*num_summoner,))
             Pros.append(p)
             p.start()
    
         for t in Pros:
             t.join()
         print(time.time()-t0)
    

    꽤 힘들었지만 AWS EC2, RunCommand등을 사용하여 수십개의 인스턴스들을 동시에 병렬로 다루며 large data pipeline 구축에 대해서 많은 것을 배웠다. 어떻게 이 데이터를 처리했으며 어떤식으로 최종 상품을 만드는데 사용하였는지 등은 블로깅 하고싶지 않으므로 비밀에 부쳐두려 한다.