• 坚守岗位守护绿城美丽与整洁 2018-12-05
  • php

    php

    PHP解析百度图片搜索结果json中objURL图片原始地址函数

    孤魂 发表了文章 ? 0 个评论 ? 3856 次浏览 ? 2016-11-17 09:01 ? 来自相关话题

    <?php
    $str = 'ippr_z2C$qAzdH3FAzdH3Fojgojg_z&e3Bf5f5_z&e3Bv54AzdH3FrAzdH3Fda8a8a8nAzdH3Fda8a8a8n8m9bdc-8a0abn8cdb_z&e3B3r2';
    function baidtu_uncomplie($k) {
    $c = array('_z2C$q', '_z&e3B', 'AzdH3F');
    $d = array('w' => "a", 'k' => "b", 'v' => "c", '1' => "d", 'j' => "e", 'u' => "f", '2' => "g", 'i' => "h", 't' => "i", '3' => "j", 'h' => "k", 's' => "l", '4' => "m", 'g' => "n", "5" => "o", 'r' => "p", 'q' => "q", "6" => "r", 'f' => "s", 'p' => "t", "7" => "u", 'e' => "v", 'o' => "w", "8" => "1", 'd' => "2", 'n' => "3", "9" => "4", 'c' => "5", 'm' => "6", "0" => "7", 'b' => "8", 'l' => "9", 'a' => "0", '_z2C$q' => ":", '_z&e3B' => ".", 'AzdH3F' => "/");
    if (!$k || strpos($k, "http")) return $k;
    $j = $k;
    foreach ($c as $value) {
    $j = str_replace($value, $d[$value], $j);
    }
    $arr = str_split($j);
    foreach ($arr as $k => $v) {
    if (preg_match('/^[a-w\d]+$/', $v)) $arr[$k] = $d[$v];
    }
    return implode('', $arr);
    }
    print_r(baidtu_uncomplie($str)); 查看全部
    <?php
    $str = 'ippr_z2C$qAzdH3FAzdH3Fojgojg_z&e3Bf5f5_z&e3Bv54AzdH3FrAzdH3Fda8a8a8nAzdH3Fda8a8a8n8m9bdc-8a0abn8cdb_z&e3B3r2';
    function baidtu_uncomplie($k) {
    $c = array('_z2C$q', '_z&e3B', 'AzdH3F');
    $d = array('w' => "a", 'k' => "b", 'v' => "c", '1' => "d", 'j' => "e", 'u' => "f", '2' => "g", 'i' => "h", 't' => "i", '3' => "j", 'h' => "k", 's' => "l", '4' => "m", 'g' => "n", "5" => "o", 'r' => "p", 'q' => "q", "6" => "r", 'f' => "s", 'p' => "t", "7" => "u", 'e' => "v", 'o' => "w", "8" => "1", 'd' => "2", 'n' => "3", "9" => "4", 'c' => "5", 'm' => "6", "0" => "7", 'b' => "8", 'l' => "9", 'a' => "0", '_z2C$q' => ":", '_z&e3B' => ".", 'AzdH3F' => "/");
    if (!$k || strpos($k, "http")) return $k;
    $j = $k;
    foreach ($c as $value) {
    $j = str_replace($value, $d[$value], $j);
    }
    $arr = str_split($j);
    foreach ($arr as $k => $v) {
    if (preg_match('/^[a-w\d]+$/', $v)) $arr[$k] = $d[$v];
    }
    return implode('', $arr);
    }
    print_r(baidtu_uncomplie($str));

    PHP通过pthreads扩展实现真正的多线程采集

    孤魂 发表了文章 ? 0 个评论 ? 2011 次浏览 ? 2015-12-25 09:11 ? 来自相关话题

    最近自己的项目采集,一直在使用PHP CURL的功能在进行采集,使用命令行执行PHP文件,解决了PHP运行超时的问题,但只能单线程采集。最近找到了使用pthreads实现多线程采集的方法,这里安装方法就不在详细说明了,如果你使用Phpstudy的套件的话,需要注意到两点,一是选择好正确的版本,php 5.x只能使用2.09以下的版本;其次是需要将php_pthreads.dll放在ext目录,然后在php.ini文件中加载此文件;最后需要将pthreadVC2.dll分别复制到./PHPa/目录和./Apache/bin/目录。下面分享一下我的采集源码。<?php
    set_time_limit(0);

    class new_thread_run extends Thread
    {
    public $url;
    public $data;
    public function __construct($url)
    {
    $this->url = $url;
    }
    public function run()
    {
    if (($url = $this->url)) {
    $this->data = model_http_curl_get($url);
    }
    }
    }
    function model_thread_result_get($urls_array)
    {
    if (class_exists('Thread')) {
    foreach ($urls_array as $key => $value) {
    $thread_array[$key] = new new_thread_run($value);
    $thread_array[$key]->start();
    }
    foreach ($thread_array as $thread_array_key => $thread_array_value) {
    while ($thread_array[$thread_array_key]->isRunning()) {
    usleep(10);
    }
    if ($thread_array[$thread_array_key]->join()) {
    $variable_data[$thread_array_key] = $thread_array[$thread_array_key]->data;
    }
    }
    } else {
    foreach ($urls_array as $key => $value) {
    $variable_data[$key] = model_http_curl_get($value);
    }
    }
    return $variable_data;
    }
    function model_http_curl_get($url)
    {
    $userAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)';
    $curl = curl_init();
    curl_setopt($curl, CURLOPT_URL, $url);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curl, CURLOPT_TIMEOUT, 20);
    curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
    $result = curl_exec($curl);
    curl_close($curl);
    return $result;
    }

    //实际例子
    for ($i = 0; $i < 50; $i++) {
    $urls_array = "//www.baidu.com/s?wd=" . mt_rand(10000, 20000);
    }
    $t = microtime(true);
    $result = model_thread_result_get($urls_array);
    $e = microtime(true);
    echo "多线程:" . ($e - $t) . "\n";
    ?>参考链接:
    //www.thinkphp.cn/topic/22676.html//zyan.cc/pthreads/ 查看全部
    最近自己的项目采集,一直在使用PHP CURL的功能在进行采集,使用命令行执行PHP文件,解决了PHP运行超时的问题,但只能单线程采集。最近找到了使用pthreads实现多线程采集的方法,这里安装方法就不在详细说明了,如果你使用Phpstudy的套件的话,需要注意到两点,一是选择好正确的版本,php 5.x只能使用2.09以下的版本;其次是需要将php_pthreads.dll放在ext目录,然后在php.ini文件中加载此文件;最后需要将pthreadVC2.dll分别复制到./PHPa/目录和./Apache/bin/目录。下面分享一下我的采集源码。
    <?php
    set_time_limit(0);

    class new_thread_run extends Thread
    {
    public $url;
    public $data;
    public function __construct($url)
    {
    $this->url = $url;
    }
    public function run()
    {
    if (($url = $this->url)) {
    $this->data = model_http_curl_get($url);
    }
    }
    }
    function model_thread_result_get($urls_array)
    {
    if (class_exists('Thread')) {
    foreach ($urls_array as $key => $value) {
    $thread_array[$key] = new new_thread_run($value);
    $thread_array[$key]->start();
    }
    foreach ($thread_array as $thread_array_key => $thread_array_value) {
    while ($thread_array[$thread_array_key]->isRunning()) {
    usleep(10);
    }
    if ($thread_array[$thread_array_key]->join()) {
    $variable_data[$thread_array_key] = $thread_array[$thread_array_key]->data;
    }
    }
    } else {
    foreach ($urls_array as $key => $value) {
    $variable_data[$key] = model_http_curl_get($value);
    }
    }
    return $variable_data;
    }
    function model_http_curl_get($url)
    {
    $userAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)';
    $curl = curl_init();
    curl_setopt($curl, CURLOPT_URL, $url);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curl, CURLOPT_TIMEOUT, 20);
    curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
    $result = curl_exec($curl);
    curl_close($curl);
    return $result;
    }

    //实际例子
    for ($i = 0; $i < 50; $i++) {
    $urls_array = "//www.baidu.com/s?wd=" . mt_rand(10000, 20000);
    }
    $t = microtime(true);
    $result = model_thread_result_get($urls_array);
    $e = microtime(true);
    echo "多线程:" . ($e - $t) . "\n";
    ?>
    参考链接:

    PHP解析百度图片搜索结果json中objURL图片原始地址函数

    孤魂 发表了文章 ? 0 个评论 ? 3856 次浏览 ? 2016-11-17 09:01 ? 来自相关话题

    <?php
    $str = 'ippr_z2C$qAzdH3FAzdH3Fojgojg_z&e3Bf5f5_z&e3Bv54AzdH3FrAzdH3Fda8a8a8nAzdH3Fda8a8a8n8m9bdc-8a0abn8cdb_z&e3B3r2';
    function baidtu_uncomplie($k) {
    $c = array('_z2C$q', '_z&e3B', 'AzdH3F');
    $d = array('w' => "a", 'k' => "b", 'v' => "c", '1' => "d", 'j' => "e", 'u' => "f", '2' => "g", 'i' => "h", 't' => "i", '3' => "j", 'h' => "k", 's' => "l", '4' => "m", 'g' => "n", "5" => "o", 'r' => "p", 'q' => "q", "6" => "r", 'f' => "s", 'p' => "t", "7" => "u", 'e' => "v", 'o' => "w", "8" => "1", 'd' => "2", 'n' => "3", "9" => "4", 'c' => "5", 'm' => "6", "0" => "7", 'b' => "8", 'l' => "9", 'a' => "0", '_z2C$q' => ":", '_z&e3B' => ".", 'AzdH3F' => "/");
    if (!$k || strpos($k, "http")) return $k;
    $j = $k;
    foreach ($c as $value) {
    $j = str_replace($value, $d[$value], $j);
    }
    $arr = str_split($j);
    foreach ($arr as $k => $v) {
    if (preg_match('/^[a-w\d]+$/', $v)) $arr[$k] = $d[$v];
    }
    return implode('', $arr);
    }
    print_r(baidtu_uncomplie($str)); 查看全部
    <?php
    $str = 'ippr_z2C$qAzdH3FAzdH3Fojgojg_z&e3Bf5f5_z&e3Bv54AzdH3FrAzdH3Fda8a8a8nAzdH3Fda8a8a8n8m9bdc-8a0abn8cdb_z&e3B3r2';
    function baidtu_uncomplie($k) {
    $c = array('_z2C$q', '_z&e3B', 'AzdH3F');
    $d = array('w' => "a", 'k' => "b", 'v' => "c", '1' => "d", 'j' => "e", 'u' => "f", '2' => "g", 'i' => "h", 't' => "i", '3' => "j", 'h' => "k", 's' => "l", '4' => "m", 'g' => "n", "5" => "o", 'r' => "p", 'q' => "q", "6" => "r", 'f' => "s", 'p' => "t", "7" => "u", 'e' => "v", 'o' => "w", "8" => "1", 'd' => "2", 'n' => "3", "9" => "4", 'c' => "5", 'm' => "6", "0" => "7", 'b' => "8", 'l' => "9", 'a' => "0", '_z2C$q' => ":", '_z&e3B' => ".", 'AzdH3F' => "/");
    if (!$k || strpos($k, "http")) return $k;
    $j = $k;
    foreach ($c as $value) {
    $j = str_replace($value, $d[$value], $j);
    }
    $arr = str_split($j);
    foreach ($arr as $k => $v) {
    if (preg_match('/^[a-w\d]+$/', $v)) $arr[$k] = $d[$v];
    }
    return implode('', $arr);
    }
    print_r(baidtu_uncomplie($str));

    PHP通过pthreads扩展实现真正的多线程采集

    孤魂 发表了文章 ? 0 个评论 ? 2011 次浏览 ? 2015-12-25 09:11 ? 来自相关话题

    最近自己的项目采集,一直在使用PHP CURL的功能在进行采集,使用命令行执行PHP文件,解决了PHP运行超时的问题,但只能单线程采集。最近找到了使用pthreads实现多线程采集的方法,这里安装方法就不在详细说明了,如果你使用Phpstudy的套件的话,需要注意到两点,一是选择好正确的版本,php 5.x只能使用2.09以下的版本;其次是需要将php_pthreads.dll放在ext目录,然后在php.ini文件中加载此文件;最后需要将pthreadVC2.dll分别复制到./PHPa/目录和./Apache/bin/目录。下面分享一下我的采集源码。<?php
    set_time_limit(0);

    class new_thread_run extends Thread
    {
    public $url;
    public $data;
    public function __construct($url)
    {
    $this->url = $url;
    }
    public function run()
    {
    if (($url = $this->url)) {
    $this->data = model_http_curl_get($url);
    }
    }
    }
    function model_thread_result_get($urls_array)
    {
    if (class_exists('Thread')) {
    foreach ($urls_array as $key => $value) {
    $thread_array[$key] = new new_thread_run($value);
    $thread_array[$key]->start();
    }
    foreach ($thread_array as $thread_array_key => $thread_array_value) {
    while ($thread_array[$thread_array_key]->isRunning()) {
    usleep(10);
    }
    if ($thread_array[$thread_array_key]->join()) {
    $variable_data[$thread_array_key] = $thread_array[$thread_array_key]->data;
    }
    }
    } else {
    foreach ($urls_array as $key => $value) {
    $variable_data[$key] = model_http_curl_get($value);
    }
    }
    return $variable_data;
    }
    function model_http_curl_get($url)
    {
    $userAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)';
    $curl = curl_init();
    curl_setopt($curl, CURLOPT_URL, $url);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curl, CURLOPT_TIMEOUT, 20);
    curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
    $result = curl_exec($curl);
    curl_close($curl);
    return $result;
    }

    //实际例子
    for ($i = 0; $i < 50; $i++) {
    $urls_array = "//www.baidu.com/s?wd=" . mt_rand(10000, 20000);
    }
    $t = microtime(true);
    $result = model_thread_result_get($urls_array);
    $e = microtime(true);
    echo "多线程:" . ($e - $t) . "\n";
    ?>参考链接:
    //www.thinkphp.cn/topic/22676.html//zyan.cc/pthreads/ 查看全部
    最近自己的项目采集,一直在使用PHP CURL的功能在进行采集,使用命令行执行PHP文件,解决了PHP运行超时的问题,但只能单线程采集。最近找到了使用pthreads实现多线程采集的方法,这里安装方法就不在详细说明了,如果你使用Phpstudy的套件的话,需要注意到两点,一是选择好正确的版本,php 5.x只能使用2.09以下的版本;其次是需要将php_pthreads.dll放在ext目录,然后在php.ini文件中加载此文件;最后需要将pthreadVC2.dll分别复制到./PHPa/目录和./Apache/bin/目录。下面分享一下我的采集源码。
    <?php
    set_time_limit(0);

    class new_thread_run extends Thread
    {
    public $url;
    public $data;
    public function __construct($url)
    {
    $this->url = $url;
    }
    public function run()
    {
    if (($url = $this->url)) {
    $this->data = model_http_curl_get($url);
    }
    }
    }
    function model_thread_result_get($urls_array)
    {
    if (class_exists('Thread')) {
    foreach ($urls_array as $key => $value) {
    $thread_array[$key] = new new_thread_run($value);
    $thread_array[$key]->start();
    }
    foreach ($thread_array as $thread_array_key => $thread_array_value) {
    while ($thread_array[$thread_array_key]->isRunning()) {
    usleep(10);
    }
    if ($thread_array[$thread_array_key]->join()) {
    $variable_data[$thread_array_key] = $thread_array[$thread_array_key]->data;
    }
    }
    } else {
    foreach ($urls_array as $key => $value) {
    $variable_data[$key] = model_http_curl_get($value);
    }
    }
    return $variable_data;
    }
    function model_http_curl_get($url)
    {
    $userAgent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)';
    $curl = curl_init();
    curl_setopt($curl, CURLOPT_URL, $url);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curl, CURLOPT_TIMEOUT, 20);
    curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
    $result = curl_exec($curl);
    curl_close($curl);
    return $result;
    }

    //实际例子
    for ($i = 0; $i < 50; $i++) {
    $urls_array = "//www.baidu.com/s?wd=" . mt_rand(10000, 20000);
    }
    $t = microtime(true);
    $result = model_thread_result_get($urls_array);
    $e = microtime(true);
    echo "多线程:" . ($e - $t) . "\n";
    ?>
    参考链接:

  • 坚守岗位守护绿城美丽与整洁 2018-12-05