php curl多线程

使用curl_multi可以实现多线程的curl

1.通过curl_multi_init()创建一个curl批处理的句柄。

2.通过循环,使用curl_init()初始化不同的url。

foreach ($urls as $key => $url) {
    $curl[$k] = curl_init();
    curl_setopt($curl[$k], CURLOPT_URL, $url);
    curl_setopt($curl[$k], CURLOPT_HEADER, 0);
}

3.使用curl_multi_add_handle()将每个单独的curl添加到批处理curl会话中。

curl_multi_add_handle($mh, $curl[$k]);

4.执行批处理curl_multi_exec()

$running=null;
// 执行批处理句柄
do {
    usleep(10000);
    curl_multi_exec($mh,$running);
} while ($running > 0);

5.关闭全部句柄curl_multi_remove_handle,curl_multi_close

foreach ($curls as $k => $curl) {
        curl_multi_remove_handle($mh, $curl);
        curl_close($curl);
}
curl_multi_close($mh);

将这些步骤串起来

function curl_multi_url($urls) {
    $mh = curl_multi_init();
    $curls = $text = array();

    foreach ($urls as $k => $url) {
        $curls[$k] = curl_init();
        curl_setopt($curls[$k], CURLOPT_URL, $url);
        curl_setopt($curls[$k], CURLOPT_HEADER, 0);
        curl_multi_add_handle($mh, $curls[$k]);
    }

    $running = null;
    do {
        curl_multi_exec($mh, $running);
    }while($running > 0);

    foreach ($curls as $k => $curl) {
        $text[$urls[$k]] = curl_multi_getcontent($curl);
        curl_multi_remove_handle($mh, $curl);
        curl_close($curl);
    }
    curl_multi_close($mh);
}

然而,上述代码依旧有问题,上述的do while循环会导致cpu占用100%,改进

do {
    $mrc = curl_multi_exec($mh, $active);
}while($active && $mrc === CURLM_CALL_MULTI_PERFORM);

while($active && $mrc === CURLM_OK) {
    if(curl_multi_select($mh) !== -1) {
        do {
            $mrc = curl_multi_exec($mh, $active);
        }while($active && $mrc === CURLM_CALL_MULTI_PERFORM);
    }
}

因为$active要等全部url数据接受完毕才变成false,所以这里用到了curl_multi_exec的返回值判断是否还有数据,当有数据的时候就不停调用curl_multi_exec,暂时没有数据就进入select阶段,新数据一来就可以被唤醒继续执行。这里的好处就是CPU的无谓消耗没有了。

比较完美的实现方式:

function get_thread_url($urls) {
    if (!is_array($urls) or count($urls) == 0) {  
        return false;  
    }   
    $num=count($urls);  
    $curl = $text = array();  
    $handle = curl_multi_init();  

    foreach($urls as $k=>$url){
        $curl[$k] = curl_init($url);  
        curl_setopt ($curl[$k], CURLOPT_URL, $url);  
        curl_setopt ($curl[$k], CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1'); 
        curl_setopt ($curl[$k], CURLOPT_REFERER, $url);
        curl_setopt ($curl[$k], CURLOPT_ENCODING, "gzip"); 
        curl_setopt ($curl[$k], CURLOPT_RETURNTRANSFER, 1);  
        curl_setopt ($curl[$k], CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt ($curl[$k], CURLOPT_MAXREDIRS, 5);      
        curl_setopt ($curl[$k], CURLOPT_TIMEOUT, 20);  
        curl_setopt ($curl[$k], CURLOPT_HEADER, 0);
        if(strpos($url, 'https')) {
            curl_setopt($curl[$k], CURLOPT_SSL_VERIFYPEER, false);
            curl_setopt($curl[$k], CURLOPT_SSL_VERIFYHOST, 2);
        }
        curl_multi_add_handle ($handle,$curl[$k]);  
    }  
    $active = null;  
    do {  
        $mrc = curl_multi_exec($handle, $active);  
    } while ($mrc == CURLM_CALL_MULTI_PERFORM);  

    while ($active && $mrc == CURLM_OK) {  
        if (curl_multi_select($handle) != -1) {  
            usleep(100);  
        }  
        do {  
            $mrc = curl_multi_exec($handle, $active);  
        } while ($mrc == CURLM_CALL_MULTI_PERFORM);  
    }   

    foreach ($curl as $k => $v) {  
        if (curl_error($curl[$k]) == "") {  
            $text[$urls[$k]] = (string) curl_multi_getcontent($curl[$k]);   
        }  
        curl_multi_remove_handle($handle, $curl[$k]);  
        curl_close($curl[$k]);  
    }   
    curl_multi_close($handle);  
    return $text;  
}
如果您觉得本文对您有用,欢迎捐赠或留言~
微信支付
支付宝

发表评论

您的电子邮箱地址不会被公开。 必填项已用*标注