Bonjour,
J’ai créé un petit script pour requêter le karma.
Ça donne ça:
import sys
import json
import requests
import time
import csv
import lxml
import lxml.html
from datetime import datetime
from rich.progress import Progress
ID_MEMBRE = 502
NB_PAGES = 157
URL_LIST_MESSAGES_FORMAT = "https://zestedesavoir.com/forums/messages/{id_membre}/?page={page}"
URL_KARMAT_FORMAT = "https://zestedesavoir.com/api/forums/message/{id_message}/karma/"
"""
To get a token:
$ ZDS_CRED=(echo -n '$CLIENT_ID:$CLIENT_SECRET' | base64)
$ curl -X POST \
-H "Authorization: Basic $ZDS_CRED" \
-H "Cache-Control: no-cache" \
-H "Content-Type: application/x-www-form-urlencoded" \
"https://zestedesavoir.com/oauth2/token/" \
-d '{"grant_type": "client_credentials"}'
But the API is down so we use an already authenticated session.
"""
AUTH_DATA = {"csrftoken": "CpPrWfeRJheLyQTojPLtbOSFHxNngHJAkUwWDFaGjuhrAFLDAQRblLQOIRtfxMIe","sessionid": "oyOXhkXnTSCXpfwNoaSoJVqDSIpUzCwO"}
def getKarma(s, post_id):
link = URL_KARMAT_FORMAT.format(id_message=post_id)
resp = s.get(link, headers={'accept': 'application/json'}, cookies=AUTH_DATA)
data = []
if resp.ok:
karma_data = json.loads(resp.text)
for user in karma_data['like']['users']:
data.append({"id": post_id, 'type': 'like', 'user_id': user['id'], 'username': user['username']})
for _ in range(karma_data['like']['count'] - len(karma_data['like']['users'])):
data.append({"id": post_id, 'type': 'like', 'user_id': '', 'username': ''})
for user in karma_data['dislike']['users']:
data.append({"id": post_id, 'type': 'dislike', 'user_id': user['id'], 'username': user['username']})
for _ in range(karma_data['dislike']['count'] - len(karma_data['dislike']['users'])):
data.append({"id": post_id, 'type': 'dislike', 'user_id': '', 'username': ''})
else:
print(resp, file=sys.stderr)
return resp.status_code, data
if __name__ == "__main__":
s = requests.Session()
list_post_id = []
page = 0
data = []
while True:
page += 1
link = URL_LIST_MESSAGES_FORMAT.format(page=page, id_membre=ID_MEMBRE)
resp = s.get(link, cookies=AUTH_DATA)
print(f"Processing page n°{page}")
if resp.ok:
html = lxml.html.fromstring(resp.content)
msg_linkgs = [a.attrib['href'] for a in html.cssselect(".forum-entry-title a")]
post_id = [a[a.rfind('#p') + 2:] for a in msg_linkgs]
list_post_id.extend(post_id)
elif resp.status_code == 429:
print("Too many requests waiting 10s\n")
time.sleep(60)
page -= 1
else:
break
with Progress() as progress:
karmaCount = progress.add_task("[red]Counting...", total=len(list_post_id))
for i, post_id in enumerate(list_post_id):
ret, data_post = getKarma(s, post_id)
while ret == 429:
time.sleep(60)
ret, data_post = getKarma(s, post_id)
if 200 <= ret < 300:
data.extend(data_post)
progress.update(karmaCount, completed=i)
with open(f'karma_{ID_MEMBRE}_{datetime.today().strftime("%Y-%m-%d")}.csv', 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['id', 'type', 'user_id', 'username'])
writer.writeheader()
for row in data:
writer.writerow(row)
Bon, j’ai essayé d’utiliser l’API membre mais j’obtiens une 500 uniquement quand je mets mes identifiants correctement.
Je pense que ça vient du fait que je suis le seul à utiliser client_credentials
comme grant_type
.
Comme ça ne marchait pas, j’ai utilisé une session déjà authentifiée car sinon la limite du nombre de requêtes autorisées est beaucoup trop petite. Chez les autres membres, ça devrait marcher avec l’authentification basique.
Bref, ça donne un fichier CSV que je requête comme ceci:
$ sqlite3 -cmd ".mode csv" \
".import karma_502_2023-04-27.csv karma" \
"SELECT username, (SELECT COUNT(*) from karma where username = k.username and type='like') as nbLike from (SELECT DISTINCT username from karma) k ORDER BY nbLike DESC LIMIT 11;"
Ce qui donne (avec l’option -markdown
):
username |
nbLike |
qwerty |
177 |
Amaury |
144 |
|
130 |
sgble |
86 |
Gabbro |
85 |
SpaceFox |
84 |
Aabu |
78 |
Taurre |
65 |
Gil Cot |
62 |
informaticienzero |
58 |
Blackline |
48 |
On est clairement pas sur du chef-d’œuvre, c’est un script rapide pour faire ce que je voulais faire.