|
|
Ligne 16 : |
Ligne 16 : |
| Ces informations ont pu être récupéré en utilisant l'API de la Wayback Machine pour retrouver les différentes pages dans ses archives, puis en analysant la page pour stocker ces données dans une nouvelle base de données. | | Ces informations ont pu être récupéré en utilisant l'API de la Wayback Machine pour retrouver les différentes pages dans ses archives, puis en analysant la page pour stocker ces données dans une nouvelle base de données. |
|
| |
|
| Le code utilisé est un script PHP (sale), utilisé à coté d'un dossier swf contenant les animations à chercher et une base de données MySQL allumée.
| |
|
| |
| <pre>
| |
| <?php
| |
| set_time_limit(0);
| |
| //header("Content-Type: text/plain");
| |
| if (file_exists("log_script_noelswf.log"))
| |
| {
| |
| unlink("log_script_noelswf.log");
| |
| }
| |
|
| |
| $dir = "../swf";
| |
| $dh = scandir($dir);
| |
| natsort($dh);
| |
| array_shift($dh);
| |
| array_shift($dh);
| |
|
| |
| $servername = "localhost";
| |
| $username = "root";
| |
| $password = "";
| |
| $dbname = "noelswf";
| |
| $conn = new mysqli($servername, $username, $password, $dbname);
| |
| $conn->set_charset("utf8");
| |
| if ($conn->connect_error) {
| |
| die("Connection failed: " . $conn->connect_error);
| |
| }
| |
|
| |
| foreach ($dh as $complete_file)
| |
| {
| |
| echo "-------\n";
| |
| $old = false;
| |
| $file = pathinfo($complete_file, PATHINFO_FILENAME);
| |
| echo "Swf n°" . $file . "\n";
| |
|
| |
| if (filesize($dir . "/" . $complete_file) > 0)
| |
| {
| |
| $i = 1;
| |
| do
| |
| {
| |
| if ($i > 1)
| |
| sleep (1);
| |
| echo "Essai $i, Url : " . 'http://archive.org/wayback/available?url=' . "noelswf.info/$file.html" . '×tamp=20150303' . "\n";
| |
| $json = file_get_contents('http://archive.org/wayback/available?url=' . "noelswf.info/$file.html" . '×tamp=20150303');
| |
| if ($json != false)
| |
| {
| |
| $obj = json_decode($json);
| |
| }
| |
| else
| |
| echo "La requete de page a echoué (erreur de connexion)\n";
| |
|
| |
| if ($obj == NULL)
| |
| echo "Json decode a fail\n";
| |
| $i++;
| |
| } while (isset($obj->archived_snapshots->closest) == false && $i <= 3);
| |
| $title = null;
| |
| $infos = null;
| |
| $auteur = null;
| |
| if (isset($obj->archived_snapshots->closest))
| |
| {
| |
|
| |
|
| |
| $dom = new DOMDocument();
| |
| libxml_use_internal_errors(true);
| |
| $dom->loadHTMLFile($obj->archived_snapshots->closest->url);
| |
| libxml_clear_errors();
| |
|
| |
| $xpath = new DOMXPath($dom);
| |
|
| |
| $classname = 'each_f_content_header_title';
| |
| $results = $xpath->query("//*[@class='" . $classname . "']");
| |
|
| |
| echo "Essai derniere version ->";
| |
| if ($results->length > 0)
| |
| {
| |
| echo " ok\n;";
| |
| $title = $results->item(0)->nodeValue;
| |
| $title = trim($title, " \t\n\r\0\x0B ");
| |
| $title = explode("\n", $title)[0];
| |
| $title = trim($title, " \t\n\r\0\x0B ");
| |
| }
| |
| else
| |
| {
| |
| echo " echec\n";
| |
| echo "Essai version precedente ->";
| |
| $old = true;
| |
| $classname = 'module';
| |
| $results = $xpath->query("//*[@class='" . $classname . "']");
| |
| if ($results->length > 0)
| |
| {
| |
| echo " ok\n";
| |
| $title = $results->item(0)->nodeValue;
| |
| $title = explode("Télécharger Partager Plein écran", $title)[0];
| |
| $title = trim($title, " \t\n\r\0\x0B ");
| |
| $title = explode("\n", $title)[0];
| |
| $title = trim($title, " \t\n\r\0\x0B ");
| |
| }
| |
| else
| |
| {
| |
| echo "Essai version encore plus vieille ->";
| |
| $title = $dom->getElementById("title_page")->nodeValue;
| |
| if (isset($title))
| |
| echo " normalement ok\n";
| |
| $title = substr($title, 12);
| |
| $title = substr($title, 0, -1);
| |
| $title = trim($title, " \t\n\r\0\x0B ");
| |
| $title = explode("\n", $title)[0];
| |
| $title = trim($title, " \t\n\r\0\x0B ");
| |
| }
| |
| }
| |
|
| |
| if ($old == false)
| |
| {
| |
| $classname = 'each_f_content_header_infos';
| |
| $results = $xpath->query("//*[@class='" . $classname . "']");
| |
|
| |
| if ($results->length > 0)
| |
| {
| |
| $infos = $results->item(0)->nodeValue;
| |
| $infos = substr($infos, 0, -5);
| |
| //echo $infos;
| |
| if (strpos($infos, ",") !== false && strpos($infos, "par") !== false)
| |
| $infos = explode(",", $infos)[1];
| |
| $infos = trim($infos, " \t\n\r\0\x0B ");
| |
| }
| |
|
| |
| $classname = 'each_f_content_header_infos';
| |
| $results = $xpath->query("//*[@class='" . $classname . "']/a");
| |
| if (empty($results->item(0)->nodeValue) == false)
| |
| $auteur = $results->item(0)->nodeValue;
| |
| else {
| |
| $results = $xpath->query("//*[@class='" . $classname . "']/span");
| |
| if (empty($results->item(0)->nodeValue) == false)
| |
| $auteur = $results->item(0)->nodeValue;
| |
| }
| |
| }
| |
| }
| |
| else
| |
| {
| |
| echo "Reponse vide de Wayback Machine\n";
| |
| file_put_contents("log_script_noelswf.log", "aucune reponse : " . $file . "\n", FILE_APPEND);
| |
| }
| |
|
| |
| $sql = "INSERT INTO `swf` (`id`, `titre`, `vues`, `auteur`) VALUES ('$file',";
| |
| if (empty($title) == false)
| |
| {
| |
| echo "Titre : '" . $title . "'\n";
| |
| if (substr_count($title, "\n") || strlen($title) > 40)
| |
| file_put_contents("log_script_noelswf.log", "titre chelou : " . $file . "\n", FILE_APPEND);
| |
| $title = $conn->real_escape_string($title);
| |
| $sql .= "'$title',";
| |
| }
| |
| else
| |
| {
| |
| echo "ERROR - NO TITLE FOUND\n";
| |
| $old = true;
| |
| file_put_contents("log_script_noelswf.log", "no title : " . $file . "\n", FILE_APPEND);
| |
| $sql .= "NULL,";
| |
| }
| |
|
| |
| if (empty($infos) == false)
| |
| {
| |
| echo "Vues : '" . $infos . "'\n";
| |
| if (substr_count($infos, "\n") || strlen($infos) > 40)
| |
| file_put_contents("log_script_noelswf.log", "vues chelou : " . $file . "\n", FILE_APPEND);
| |
| $infos = $conn->real_escape_string($infos);
| |
| $sql .= "'$infos',";
| |
| }
| |
| else
| |
| $sql .= "NULL,";
| |
|
| |
| if (empty($auteur) == false)
| |
| {
| |
| echo "Auteur : '" . $auteur . "'\n";
| |
| if (substr_count($auteur, "\n") || strlen($auteur) > 40)
| |
| file_put_contents("log_script_noelswf.log", "auteur chelou : " . $file . "\n", FILE_APPEND);
| |
| $auteur = $conn->real_escape_string($auteur);
| |
| $sql .= "'$auteur'";
| |
| }
| |
| else
| |
| $sql .= "NULL";
| |
|
| |
|
| |
| $sql .= ");";
| |
|
| |
|
| |
|
| |
| if (!$result = $conn->query($sql)) {
| |
| echo "Oh no! The query failed. ";
| |
| echo "Sorry, the website is experiencing problems.";
| |
| echo "Error: Our query failed to execute and here is why: \n";
| |
| echo "Query: " . $sql . "\n";
| |
| echo "Errno: " . $conn->errno . "\n";
| |
| echo "Error: " . $conn->error . "\n";
| |
| $flag_correct = false;
| |
| }
| |
| }
| |
| else
| |
| echo "Fichier vide\n";
| |
| }
| |
|
| |
| ?>
| |
|
| |
| </pre>
| |
| == Liens externes == | | == Liens externes == |
|
| |
|
| *[http://noelswf.jvflux.com Archive de Noelswf par JvFlux] | | *[http://noelswf.jvflux.com Archive de Noelswf par JvFlux] |
| *[http://noelswf.info <s>Adresse originale de Noelswf (lien mort)</s>] | | *[http://noelswf.info <s>Adresse originale de Noelswf (lien mort)</s>] |
| | *[https://archive.org/details/backup_noelswf Archive publique des fichiers Noelswf] |
| | |
|
| |
|
| [[Catégorie: Site communautaire des forums]] | | [[Catégorie: Site communautaire des forums]] |