公众号文章采集接口程序"/>
微信公众号文章采集接口程序
用法:
http://localhost2/gather.php?dir=gather&url=http%3a%2f%2fmp.weixin.qq%2fs%3f__biz%3dMjM5Nzk3NjAxMg%3d%3d%26mid%3d2650417216%26idx%3d2%26sn%3da090a3fa0771d94f9489b6913e4e8616%26scene%3d0%23wechat_redirect
dir 采集文件的目录
url 必须是encode编码过的url。
<?php
class Gather {private $url;private $path;private function ksort($arr) {foreach ( $arr as $value ) {$temp [] = $value;}return $temp;}public function __construct($url, $path) {$this->url = $url;$this->path = $path;set_time_limit(0);}public function fetch() {return $this->transform ( $this->url, $this->path );}private function transform($url, $path) {if (! file_exists ( $path ))mkdir ( $path );$data ['url'] = $url;$content = file_get_contents ( $url );preg_match ( '/<title>(.*)<\/title>/i', $content, $result );$data ['title'] = $result [1];preg_match( '/var\s+msg_cdn_url\s+=\s+"(ht+p:\/\/[^\s]+\/(?:mmbiz|mmbiz_jpg)\/([^\s]+)\/\d+(?:\?[^s]+))"/', $content, $result );$img = file_get_contents ( $result [1] );$info = getimagesize ( $result [1] );$type = str_replace ( 'image/', '', $info ['mime'] );$cfileName = $path . DIRECTORY_SEPARATOR . $result [2] . ".$type";$cfileName2 = "/$cfileName";file_put_contents ( $cfileName, $img );$data ['cover'] = $cfileName2;preg_match ( '/<div\s+class="rich_media_content\s*"\s+id="js_content">(.*?)<\/div>/is', $content, $result );preg_match_all ( '/data-src="[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg]\/[^\s]*\/\d+\?[^\s"]*=[^\s]*"|data-src="[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg]\/[^\s"]*\/\d+"|background-image\s*:\s*url\s*\(\s*[a-zA-z]+:\/\/[^\s]*mmbiz\/[^\s]*\/\d+|background-image\s*:\s*url\s*\(\s*[a-zA-z]+:\/\/[^\s]*mmbiz\/[^\s]*\/\d+\?[^\s]*=[^\s]*/is', $result [1], $result2 );foreach ( $result2 [0] as $value ) {preg_match ( '/[a-zA-z]+:\/\/[^\s]*\/[mmbiz|mmbiz_jpg]\/([^\s\/]*)\/\d+\?[^\s"]*|[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg]\/([^\s\/]*)\/\d+/', $value, $temp );$temp = array_filter ( $temp );$temp = $this->ksort ( $temp );$urlList [] = $temp [0];$nameList [] = $temp [1];}foreach ( $urlList as $value ) {$img = file_get_contents ( $value );$info = getimagesize ( $value );$type = str_replace ( 'image/', '', $info ['mime'] );$fileName = $path . DIRECTORY_SEPARATOR . array_shift ( $nameList ) . ".$type";$fileName2 = "/$fileName";file_put_contents ( $fileName, $img );$result [1] = str_replace ( $value, $fileName2, $result [1] );$result [1] = str_replace ( "data-src", "src", $result [1] );}$data ['content'] = trim ( $result [1] );return $data;}
}
extract ( $_GET );
$g = new Gather ( $url, $dir );
$result = $g->fetch ();
echo json_encode ( $result );
更多推荐
微信公众号文章采集接口程序
发布评论