php curl多线程
使用curl_multi
可以实现多线程的curl
。
1.通过curl_multi_init()
创建一个curl批处理的句柄。
2.通过循环,使用curl_init()
初始化不同的url。
foreach ($urls as $key => $url) {
$curl[$k] = curl_init();
curl_setopt($curl[$k], CURLOPT_URL, $url);
curl_setopt($curl[$k], CURLOPT_HEADER, 0);
}
3.使用curl_multi_add_handle()
将每个单独的curl添加到批处理curl会话中。
curl_multi_add_handle($mh, $curl[$k]);
4.执行批处理curl_multi_exec()
。
$running=null;
// 执行批处理句柄
do {
usleep(10000);
curl_multi_exec($mh,$running);
} while ($running > 0);
5.关闭全部句柄curl_multi_remove_handle
,curl_multi_close
。
foreach ($curls as $k => $curl) {
curl_multi_remove_handle($mh, $curl);
curl_close($curl);
}
curl_multi_close($mh);
将这些步骤串起来
function curl_multi_url($urls) {
$mh = curl_multi_init();
$curls = $text = array();
foreach ($urls as $k => $url) {
$curls[$k] = curl_init();
curl_setopt($curls[$k], CURLOPT_URL, $url);
curl_setopt($curls[$k], CURLOPT_HEADER, 0);
curl_multi_add_handle($mh, $curls[$k]);
}
$running = null;
do {
curl_multi_exec($mh, $running);
}while($running > 0);
foreach ($curls as $k => $curl) {
$text[$urls[$k]] = curl_multi_getcontent($curl);
curl_multi_remove_handle($mh, $curl);
curl_close($curl);
}
curl_multi_close($mh);
}
然而,上述代码依旧有问题,上述的do while循环会导致cpu占用100%,改进
do {
$mrc = curl_multi_exec($mh, $active);
}while($active && $mrc === CURLM_CALL_MULTI_PERFORM);
while($active && $mrc === CURLM_OK) {
if(curl_multi_select($mh) !== -1) {
do {
$mrc = curl_multi_exec($mh, $active);
}while($active && $mrc === CURLM_CALL_MULTI_PERFORM);
}
}
因为$active
要等全部url数据接受完毕才变成false
,所以这里用到了curl_multi_exec
的返回值判断是否还有数据,当有数据的时候就不停调用curl_multi_exec
,暂时没有数据就进入select
阶段,新数据一来就可以被唤醒继续执行。这里的好处就是CPU的无谓消耗没有了。
比较完美的实现方式:
function get_thread_url($urls) {
if (!is_array($urls) or count($urls) == 0) {
return false;
}
$num=count($urls);
$curl = $text = array();
$handle = curl_multi_init();
foreach($urls as $k=>$url){
$curl[$k] = curl_init($url);
curl_setopt ($curl[$k], CURLOPT_URL, $url);
curl_setopt ($curl[$k], CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1');
curl_setopt ($curl[$k], CURLOPT_REFERER, $url);
curl_setopt ($curl[$k], CURLOPT_ENCODING, "gzip");
curl_setopt ($curl[$k], CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($curl[$k], CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($curl[$k], CURLOPT_MAXREDIRS, 5);
curl_setopt ($curl[$k], CURLOPT_TIMEOUT, 20);
curl_setopt ($curl[$k], CURLOPT_HEADER, 0);
if(strpos($url, 'https')) {
curl_setopt($curl[$k], CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl[$k], CURLOPT_SSL_VERIFYHOST, 2);
}
curl_multi_add_handle ($handle,$curl[$k]);
}
$active = null;
do {
$mrc = curl_multi_exec($handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
while ($active && $mrc == CURLM_OK) {
if (curl_multi_select($handle) != -1) {
usleep(100);
}
do {
$mrc = curl_multi_exec($handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
foreach ($curl as $k => $v) {
if (curl_error($curl[$k]) == "") {
$text[$urls[$k]] = (string) curl_multi_getcontent($curl[$k]);
}
curl_multi_remove_handle($handle, $curl[$k]);
curl_close($curl[$k]);
}
curl_multi_close($handle);
return $text;
}
如果您觉得本文对您有用,欢迎捐赠或留言~
- 本博客所有文章除特别声明外,均可转载和分享,转载请注明出处!
- 本文地址:https://www.leevii.com/?p=409