Blogger (extractor): Fix extractor (#380)

* Blogger (extractor): Fix extractor

* refactor with helpers

---------

Co-authored-by: Cuong-Tran <16017808+cuong-tran@users.noreply.github.com>
This commit is contained in:
WebDitto
2026-06-02 15:12:37 -03:00
committed by GitHub
parent 7be277b6c9
commit 96415b14ea

View File

@@ -12,23 +12,18 @@ import okhttp3.OkHttpClient
import okhttp3.RequestBody.Companion.toRequestBody
class BloggerExtractor(private val client: OkHttpClient) {
suspend fun videosFromUrl(url: String, headers: Headers, suffix: String = ""): List<Video> {
val body = client.newCall(GET(url, headers))
.awaitSuccess().bodyString()
val body = client.newCall(GET(url, headers)).awaitSuccess().bodyString()
var videos = getStreamVideos(body, headers, suffix)
if (videos.isEmpty()) {
videos = getRpcVideos(url, body, headers, suffix)
}
return videos
return getStreamVideos(body, headers, suffix)
.ifEmpty { getRpcVideos(url, body, headers, suffix) }
}
private fun getStreamVideos(body: String, headers: Headers, suffix: String = ""): List<Video> {
if (body.contains("errorContainer")) return emptyList()
return body
.takeIf { !it.contains("errorContainer") }
.let { it ?: return emptyList() }
.substringAfter("\"streams\":[", "")
.substringBefore("]")
.split("},")
@@ -37,14 +32,7 @@ class BloggerExtractor(private val client: OkHttpClient) {
.takeIf(String::isNotBlank)
?: return@mapNotNull null
val format = it.substringAfter("\"format_id\":").substringBefore('}')
val quality = when (format) {
"7" -> "240p"
"18" -> "360p"
"22" -> "720p"
"37" -> "1080p"
else -> "Unknown"
}
Video(videoUrl, "Blogger - $quality $suffix".trimEnd(), videoUrl, headers)
Video(videoUrl, "Blogger - ${qualityFromFormat(format)} $suffix".trimEnd(), videoUrl, headers)
}
}
@@ -58,67 +46,78 @@ class BloggerExtractor(private val client: OkHttpClient) {
headers: Headers,
suffix: String = "",
): List<Video> {
val token = url.toHttpUrl().queryParameter("token") ?: return emptyList()
val token = url.toHttpUrl().queryParameter("token")?.takeIf(String::isNotBlank) ?: return emptyList()
val fSid = body.substringAfter("FdrFJe\":\"").substringBefore("\"")
val bl = body.substringAfter("cfb2h\":\"").substringBefore("\"")
val reqid = ((System.currentTimeMillis() / 1000L) % 86400L).toString() // Number of seconds of the day
val formSessionId = body.substringAfter("FdrFJe\":\"").substringBefore("\"")
val blogId = body.substringAfter("cfb2h\":\"").substringBefore("\"")
val requestId = ((System.currentTimeMillis() / 1000L) % 86400L).toString()
val rpcUrl = BLOGGER_BASE.toHttpUrl().newBuilder()
.addPathSegments("_/BloggerVideoPlayerUi/data/batchexecute")
.addQueryParameter("rpcids", "WcwnYd")
.addQueryParameter("source-path", "/video.g")
.addQueryParameter("f.sid", formSessionId)
.addQueryParameter("bl", blogId)
.addQueryParameter("hl", "en-US")
.addQueryParameter("_reqid", requestId)
.addQueryParameter("rt", "c")
.build()
.toString()
val rpcUrl =
"https://www.blogger.com/_/BloggerVideoPlayerUi/data/batchexecute?rpcids=WcwnYd&source-path=%2Fvideo.g&f.sid=$fSid&bl=$bl&hl=en-US&_reqid=$reqid&rt=c"
val rpcBody =
"f.req=%5B%5B%5B%22WcwnYd%22%2C%22%5B%5C%22$token%5C%22%2C%5C%22%5C%22%2C0%5D%22%2Cnull%2C%22generic%22%5D%5D%5D&".toRequestBody()
val rpcHeaders = Headers.headersOf(
"accept",
"*/*",
"accept-language",
"en-US,en;q=0.9",
"content-type",
"application/x-www-form-urlencoded;charset=UTF-8",
"priority",
"u=1, i",
"sec-fetch-dest",
"empty",
"sec-fetch-mode",
"cors",
"sec-fetch-site",
"same-origin",
"User-Agent",
headers["User-Agent"] ?: "",
"x-same-domain",
"1",
"Referer",
"https://www.blogger.com/",
"accept", "*/*",
"accept-language", "en-US,en;q=0.9",
"content-type", "application/x-www-form-urlencoded;charset=UTF-8",
"priority", "u=1, i",
"sec-fetch-dest", "empty",
"sec-fetch-mode", "cors",
"sec-fetch-site", "same-origin",
"User-Agent", headers["User-Agent"] ?: "",
"x-same-domain", "1",
"Referer", BLOGGER_BASE,
)
val rpcString = client.newCall(POST(rpcUrl, body = rpcBody, headers = rpcHeaders))
.awaitSuccess().bodyString()
if (!rpcString.contains("https://")) return emptyList()
return rpcString
.substringAfter("[[\\\"", "")
.substringBefore("]]]")
.takeIf { it.contains("https://") }
.let { it ?: return emptyList() }
.let { "\\\"$it]" }
.split("],[")
.mapNotNull {
val videoUrl = it.substringAfter("\\\"", "")
.substringBefore("\\\"")
.takeIf(String::isNotBlank)
?.parseAs<String> { "\"$it\"" }
?.parseAs<String> { "\"$it\"" } // Yes, need decode twice
?.let(::decodeDoubleEscapedJson)
?: return@mapNotNull null
val format = it.substringAfter("[").substringBefore("]")
val quality = when (format) {
"7" -> "240p"
"18" -> "360p"
"22" -> "720p"
"37" -> "1080p"
else -> "Unknown"
}
val quality = qualityFromFormat(format)
Video(videoUrl, "Blogger - $quality $suffix".trimEnd(), videoUrl, headers)
}
}
private fun decodeDoubleEscapedJson(value: String): String? = runCatching {
// The RPC response wraps the URL in JSON double-escaped strings,
// so it needs two decoding passes: escaped -> JSON string -> actual value
val first = "\"$value\"".parseAs<String>()
"\"$first\"".parseAs<String>()
}.getOrNull()
private fun qualityFromFormat(format: String): String = when (format) {
"7" -> "240p"
"18" -> "360p"
"22" -> "720p"
"37" -> "1080p"
else -> "Unknown"
}
companion object {
private const val BLOGGER_BASE = "https://www.blogger.com/"
}
}