Refactor manga detail extraction logic

This commit is contained in:
xMohnad
2025-06-11 12:17:48 +03:00
parent a15544d891
commit fc3a6e3c68

View File

@@ -130,79 +130,75 @@ class MMRCMS extends MProvider {
@override @override
Future<MManga> getDetail(String url) async { Future<MManga> getDetail(String url) async {
final statusList = [
{
"complete": 1,
"complet": 1,
"completo": 1,
"zakończone": 1,
"concluído": 1,
"مكتملة": 1,
"ongoing": 0,
"en cours": 0,
"em lançamento": 0,
"prace w toku": 0,
"ativo": 0,
"مستمرة": 0,
"em andamento": 0,
},
];
MManga manga = MManga();
final res = (await client.get(Uri.parse(url))).body; final res = (await client.get(Uri.parse(url))).body;
final document = parseHtml(res);
final manga = MManga();
final author = xpath( // Title
res, final mangaTitle = document
'//*[@class="dl-horizontal"]/dt[contains(text(), "Auteur(s)") or contains(text(), "Author(s)") or contains(text(), "Autor(es)") or contains(text(), "Yazar(lar) or contains(text(), "Mangaka(lar)")]//following-sibling::dd[1]/text()', .selectFirst(".panel-heading, .listmanga-header, .widget-title")
); ?.text;
if (author.isNotEmpty) { manga.name = mangaTitle;
manga.author = author.first;
}
final status = xpath(
res,
'//*[@class="dl-horizontal"]/dt[contains(text(), "Statut") or contains(text(), "Status") or contains(text(), "Estado") or contains(text(), "Durum")]/following-sibling::dd[1]/text()',
);
if (status.isNotEmpty) {
manga.status = parseStatus(status.first, statusList);
}
final description = xpath( // Cover
res, manga.imageUrl = guessCover(
'//*[@class="well" or @class="manga well"]/p/text()', url,
); url: document.selectFirst(".row img.img-responsive")?.getSrc,
if (description.isNotEmpty) {
manga.description = description.first;
}
manga.genre = xpath(
res,
'//*[@class="dl-horizontal"]/dt[contains(text(), "Categories") or contains(text(), "Categorias") or contains(text(), "Categorías") or contains(text(), "Catégories") or contains(text(), "Kategoriler" or contains(text(), "Kategorie") or contains(text(), "Kategori") or contains(text(), "Tagi"))]/following-sibling::dd[1]/text()',
); );
var chapUrls = xpath(res, '//*[@class="chapter-title-rtl"]/a/@href'); // Description
var chaptersNames = xpath(res, '//*[@class="chapter-title-rtl"]/a/text()'); manga.description = extractDescription(document);
var chaptersDates = xpath(
res,
'//*[@class="date-chapter-title-rtl"]/text()',
);
var dateUploads = parseDates( document.select('.panel-body h3, .row .dl-horizontal dt').forEach((
chaptersDates, element,
source.dateFormat, ) {
source.dateFormatLocale, final label = _getOwnText(
); element,
).toLowerCase().replaceFirst(RegExp(r' :$'), '');
final valueElement = element.selectFirst('div.text');
if (valueElement.text == null)
final valueElement = element.nextElementSibling;
_assignMangaInfo(manga, label, valueElement);
});
// Chapters
List<MChapter>? chaptersList = []; List<MChapter>? chaptersList = [];
for (var i = 0; i < chaptersNames.length; i++) { for (var ch in document.select("ul.chapters > li:not(.btn)")) {
MChapter chapter = MChapter(); chaptersList.add(chapterFromElement(ch, mangaTitle));
chapter.name = chaptersNames[i];
chapter.url = chapUrls[i];
chapter.dateUpload = dateUploads[i];
chaptersList.add(chapter);
} }
manga.chapters = chaptersList; manga.chapters = chaptersList;
return manga; return manga;
} }
MChapter chapterFromElement(MElement element, String mangaTitle) {
final chapter = MChapter();
final titleWrapper = element.selectFirst(".chapter-title-rtl");
final anchor = titleWrapper?.selectFirst("a");
if (anchor != null) {
chapter.url = anchor.getHref ?? '';
chapter.name = cleanChapterName(titleWrapper.text, mangaTitle);
final dateElement = element.selectFirst(".date-chapter-title-rtl");
if (dateElement != null && dateElement.text.isNotEmpty) {
chapter.dateUpload = parseDates(
[dateElement.text],
source.dateFormat,
source.dateFormatLocale,
)[0];
} else {
chapter.dateUpload = DateTime.now().millisecondsSinceEpoch.toString();
}
}
return chapter;
}
@override @override
Future<List<String>> getPageList(String url) async { Future<List<String>> getPageList(String url) async {
final res = (await client.get(Uri.parse(url))).body; final res = (await client.get(Uri.parse(url))).body;
@@ -318,6 +314,141 @@ class MMRCMS extends MProvider {
return Uri.parse(source.baseUrl).resolve(url).toString(); return Uri.parse(source.baseUrl).resolve(url).toString();
} }
} }
String extractDescription(MDocument document) {
final container = document.selectFirst(".row .well");
if (container == null) return "";
String text = container.text;
container.select("h5").forEach((element) {
text = text.replaceAll(element.text, "");
});
return text.replaceAll(RegExp(r'\n{3,}'), '\n\n').trim();
}
String _getOwnText(MElement element) {
final text = element.text;
final childrenText = element.children.map((e) => e.text).join();
return text.replaceFirst(childrenText, '').trim();
}
void _assignMangaInfo(MManga manga, String label, MElement valueElement) {
if (_detailAuthor.contains(label)) {
manga.author = valueElement.text;
} else if (_detailArtist.contains(label)) {
manga.artist = valueElement.text;
} else if (_detailGenre.contains(label)) {
manga.genre = valueElement?.select("a").map((e) => e.text).toList;
} else if (_detailStatus.contains(label)) {
manga.status = parseStatus(valueElement.text, statusList);
}
}
String cleanChapterName(String name, String mangaTitle) {
const chapterString = "Chapter";
const chapterNamePrefix = "";
try {
final initialName = name.replaceFirst(
'$chapterNamePrefix$mangaTitle',
chapterString,
);
final parts = initialName.split(':');
if (parts.isEmpty) return name;
final firstPart = parts[0].trim();
if (parts.length == 1) return firstPart;
final secondPart = parts.sublist(1).join(':').trim();
return firstPart == secondPart ? firstPart : "$firstPart: $secondPart";
} catch (e) {
return name;
}
}
const _detailAuthor = {
'author(s)',
'autor(es)',
'auteur(s)',
'著作',
'yazar(lar)',
'mangaka(lar)',
'pengarang/penulis',
'pengarang',
'penulis',
'autor',
'المؤلف',
'перевод',
'autor/autorzy',
};
const _detailArtist = {
'artist(s)',
'artiste(s)',
'sanatçi(lar)',
'artista(s)',
'artist(s)/ilustrator',
'الرسام',
'seniman',
'rysownik/rysownicy',
'artista',
};
const _detailGenre = {
'categories',
'categorías',
'catégories',
'ジャンル',
'kategoriler',
'categorias',
'kategorie',
'التصنيفات',
'жанр',
'kategori',
'tagi',
'género',
};
const _detailStatus = {
'status',
'statut',
'estado',
'状態',
'durum',
'الحالة',
'статус',
};
const statusList = [
{
// Ongoing Statuses (0)
'ongoing': 0,
'مستمرة': 0,
'en cours': 0,
'em lançamento': 0,
'prace w toku': 0,
'ativo': 0,
'em andamento': 0,
'activo': 0,
// Complete Statuses (1)
'complete': 1,
'مكتملة': 1,
'complet': 1,
'completo': 1,
'zakończone': 1,
'concluído': 1,
'finalizado': 1,
// Dropped Statuses (3)
'dropped': 3,
},
];
} }
MMRCMS main(MSource source) { MMRCMS main(MSource source) {