微信公众号文章采集接口程序

编程入门 行业动态 更新时间:2024-10-24 03:24:49

微信<a href=https://www.elefans.com/category/jswz/34/1769853.html style=公众号文章采集接口程序"/>

微信公众号文章采集接口程序

用法:
http://localhost2/gather.php?dir=gather&url=http%3a%2f%2fmp.weixin.qq%2fs%3f__biz%3dMjM5Nzk3NjAxMg%3d%3d%26mid%3d2650417216%26idx%3d2%26sn%3da090a3fa0771d94f9489b6913e4e8616%26scene%3d0%23wechat_redirect

dir 采集文件的目录
url 必须是encode编码过的url。

<?php
class Gather {private $url;private $path;private function ksort($arr) {foreach ( $arr as $value ) {$temp [] = $value;}return $temp;}public function __construct($url, $path) {$this->url = $url;$this->path = $path;set_time_limit(0);}public function fetch() {return $this->transform ( $this->url, $this->path );}private function transform($url, $path) {if (! file_exists ( $path ))mkdir ( $path );$data ['url'] = $url;$content = file_get_contents ( $url );preg_match ( '/<title>(.*)<\/title>/i', $content, $result );$data ['title'] = $result [1];preg_match( '/var\s+msg_cdn_url\s+=\s+"(ht+p:\/\/[^\s]+\/(?:mmbiz|mmbiz_jpg)\/([^\s]+)\/\d+(?:\?[^s]+))"/', $content, $result );$img = file_get_contents ( $result [1] );$info = getimagesize ( $result [1] );$type = str_replace ( 'image/', '', $info ['mime'] );$cfileName = $path . DIRECTORY_SEPARATOR . $result [2] . ".$type";$cfileName2 = "/$cfileName";file_put_contents ( $cfileName, $img );$data ['cover'] = $cfileName2;preg_match ( '/<div\s+class="rich_media_content\s*"\s+id="js_content">(.*?)<\/div>/is', $content, $result );preg_match_all ( '/data-src="[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg]\/[^\s]*\/\d+\?[^\s"]*=[^\s]*"|data-src="[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg]\/[^\s"]*\/\d+"|background-image\s*:\s*url\s*\(\s*[a-zA-z]+:\/\/[^\s]*mmbiz\/[^\s]*\/\d+|background-image\s*:\s*url\s*\(\s*[a-zA-z]+:\/\/[^\s]*mmbiz\/[^\s]*\/\d+\?[^\s]*=[^\s]*/is', $result [1], $result2 );foreach ( $result2 [0] as $value ) {preg_match ( '/[a-zA-z]+:\/\/[^\s]*\/[mmbiz|mmbiz_jpg]\/([^\s\/]*)\/\d+\?[^\s"]*|[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg]\/([^\s\/]*)\/\d+/', $value, $temp );$temp = array_filter ( $temp );$temp = $this->ksort ( $temp );$urlList [] = $temp [0];$nameList [] = $temp [1];}foreach ( $urlList as $value ) {$img = file_get_contents ( $value );$info = getimagesize ( $value );$type = str_replace ( 'image/', '', $info ['mime'] );$fileName = $path . DIRECTORY_SEPARATOR . array_shift ( $nameList ) . ".$type";$fileName2 = "/$fileName";file_put_contents ( $fileName, $img );$result [1] = str_replace ( $value, $fileName2, $result [1] );$result [1] = str_replace ( "data-src", "src", $result [1] );}$data ['content'] = trim ( $result [1] );return $data;}
}
extract ( $_GET );
$g = new Gather ( $url, $dir );
$result = $g->fetch ();
echo json_encode ( $result );

更多推荐

微信公众号文章采集接口程序

本文发布于:2024-03-11 15:04:38,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1729228.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:公众   接口   程序   文章

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!