WordPress.org

Plugin Directory

Changeset 622192


Ignore:
Timestamp:
11/07/12 16:27:14 (18 months ago)
Author:
dllh
Message:

Posterous Importer: Fix a bunch of issues with importing images. We had been pulling in scaled images in a way that wasn't reliable because Posterous seems to sometimes use an infix of "scaled1000" and other times of "scaled.1000" and a simple replacement doesn't work.

Instead of using a regex to pull out a post's images and hoping a simple replacement of the filename works (it doesn't in many cases because of some other gobbledegook in the urls that doesn't always remain the same), use the API's media metadata. This seems to work a lot better.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • posterous-importer/branches/v2api/posterous.php

    r621812 r622192  
    6666        parent::__construct(); 
    6767        $this->last_api_call_timestamp = time(); 
    68         add_action( 'process_attachment', array( $this, 'process_attachment' ), 10, 3 ); 
     68        add_action( 'process_attachment', array( $this, 'process_attachment' ), 10, 2 ); 
    6969        add_action( 'posterous_handle_bad_response', array( $this, 'handle_bad_response' ), 10, 2 ); 
    7070        add_action( 'posterous_api_sleep', array( $this, 'api_sleep' ) ); 
     
    364364                } 
    365365 
     366                foreach ( $entry->media->images as $image ) { 
     367                    $file = new stdClass(); 
     368                    $file->type = 'image'; 
     369                    $file->url = $image->full->url; 
     370                    $file->resized = array( $image->scaled500->url, $image->scaled1000->url ); 
     371                    $media[] = $file; 
     372                } 
     373 
    366374                if ( 0 < count( $media ) ) 
    367375                    add_post_meta( $post_id, 'posterous_' . $this->bid . '_media', $media, true ); 
     
    504512            $media = array_pop( get_post_meta( $post_id, 'posterous_' . $this->bid . '_media' ) ); 
    505513 
    506             printf( "<em>%s</em>", __( 'Checking' ) . " '$post->post_title' " . __( 'for images...' ) ); 
    507             $attachments = $this->extract_post_media( $post->post_content, $media ); 
    508             printf( "<em>%s</em><br />\n", ' ' . sizeof( $attachments['fullsize'] ) + sizeof( $attachments['single'] ) . ' ' . __( 'images found' ) ); 
    509  
    510             // Process attachments 
    511             if ( !empty( $attachments['fullsize'] ) ) { 
    512                 do_action( 'process_attachment', $post, $attachments['fullsize'], $attachments['thumb'] ); 
    513             } 
    514  
    515             /* Process elements in the $single array only if they haven't already been processed 
    516              * for the fullsize array. The need to do this is a byproduct of our dealing 
    517              * with Posterous's dimension scaling hack. 
    518              */ 
    519             $single = array_diff( $attachments['single'], $attachments['fullsize'] ); 
    520             if ( !empty( $single ) ) { 
    521                 do_action( 'process_attachment', $post, $attachments['single'], $attachments['thumb'] ); 
    522             } 
    523  
    524             if ( !empty( $attachments['single'] ) ) { 
    525                 do_action( 'process_attachment', $post, $attachments['single'], $attachments['single'] ); 
    526             } 
    527  
     514            printf( "<em>%s</em>", __( 'Checking' ) . " '$post->post_title' " . __( 'for media...' ) ); 
     515            $attachments = $this->extract_post_media( $media ); 
     516            printf( "<em>%s</em><br />\n", ' ' . sizeof( $attachments ) . ' ' . __( 'media files found' ) ); 
     517 
     518            foreach ( $attachments as $attachment ) { 
     519                do_action( 'process_attachment', $post, $attachment ); 
     520            }    
     521             
    528522            unset( $post, $attachments ); 
    529523 
     
    540534     * @return void 
    541535     */ 
    542     function process_attachment( $post, $fullsizes, $thumbs ) { 
    543         if ( empty( $fullsizes ) ) 
    544             return; 
    545  
    546         foreach ( $fullsizes as $id => $fullsize ) { 
    547             if( $this->is_user_over_quota() ) 
    548                 return false; 
    549  
    550             /** 
    551              * Check to see if we've already fetched the path for this image. Posterous 
    552              * serves files from different hostnames, so if we don't check by path only, 
    553              * we can download multiple times. Since we still want to do thumbs and 
    554              * do a url remapping later for the $fullpath url (which may be referenced in 
    555              * page source), do those bits even if we've already seen this $fullsize_path but then bail. 
    556              */ 
    557             $fullsize_path = parse_url( $fullsize, PHP_URL_PATH ); 
    558  
    559             $thumb = $thumbs[$id]; 
    560  
    561             // Skip duplicates 
    562             if ( isset( $this->attachments[$fullsize_path] ) ) { 
    563                 $post_id = $this->attachments[$fullsize]; 
    564                 printf( "<em>%s</em><br />\n", __( 'Skipping duplicate' ) . ' ' . $fullsize ); 
    565                 // Get new attachment URL 
    566                 $attachment_url = wp_get_attachment_url( $post_id ); 
    567  
    568                 // Update url_remap array 
    569                 $this->url_remap[$fullsize] = $attachment_url; 
    570                 $sized = image_downsize( $post_id, 'medium' ); 
    571                 if ( isset( $sized[0] ) ) { 
    572                     $this->url_remap[$thumb] = $sized[0]; 
    573                 } 
    574  
    575                 continue; 
    576             } 
    577  
    578             echo '<em>Importing attachment ' . htmlspecialchars( $fullsize ) . "...</em>"; 
    579             $upload = $this->fetch_remote_file( $post, $fullsize ); 
    580  
    581             if ( is_wp_error( $upload ) ) { 
    582                 printf( "<em>%s</em><br />\n", __( 'Remote file error:' ) . ' ' . htmlspecialchars( $upload->get_error_message() ) ); 
    583                 continue; 
    584             } else { 
    585                 printf( "<em> (%s)</em><br />\n", size_format( filesize( $upload['file'] ) ) ); 
    586             } 
    587  
    588             if ( 0 == filesize( $upload['file'] ) ) { 
    589                 print __( "Zero length file, deleting..." ) . "<br />\n"; 
    590                 @unlink( $upload['file'] ); 
    591                 continue; 
    592             } 
    593  
    594             $info = wp_check_filetype( $upload['file'] ); 
    595             if ( false === $info['ext'] ) { 
    596                 printf( "<em>%s</em><br />\n", $upload['file'] . __( 'has an invalid file type') ); 
    597                 @unlink( $upload['file'] ); 
    598                 continue; 
    599             } 
    600  
    601             // as per wp-admin/includes/upload.php 
    602             $attachment = array ( 
    603                 'post_title' => $post->post_title, 
    604                 'post_content' => '', 
    605                 'post_status' => 'inherit', 
    606                 'guid' => $upload['url'], 
    607                 'post_mime_type' => $info['type'] 
    608                 ); 
    609  
    610             $post_id = (int) wp_insert_attachment( $attachment, $upload['file'], $post->ID ); 
    611             $attachment_meta = @wp_generate_attachment_metadata( $post_id, $upload['file'] ); 
    612             wp_update_attachment_metadata( $post_id, $attachment_meta ); 
    613  
    614             // Fire an action to do anything we might like to do to the post after adding an attachment (e.g. inserting shortcodes). 
    615             // This is not implemented within the plugin; it's just here so that it can be extended. 
    616             do_action( 'posterous_process_attachment_post_update', $post, $post_id, $fullsize, $media_types ); 
    617  
    618             // Add remote_url to post_meta 
    619             add_post_meta( $post_id, 'posterous_' . $this->bid . '_attachment', $fullsize, true ); 
    620             // Add remote_url to hash table 
    621             $this->attachments[$fullsize] = $post_id; 
    622  
     536    function process_attachment( $post, $attachment ) { 
     537 
     538        if( $this->is_user_over_quota() ) 
     539            return false; 
     540 
     541        /** 
     542         * Check to see if we've already fetched the path for this image. Posterous 
     543         * serves files from different hostnames, so if we don't check by path only, 
     544         * we can download multiple times. Since we still want to do thumbs and 
     545         * do a url remapping later for the $fullpath url (which may be referenced in 
     546         * page source), do those bits even if we've already seen this $fullsize_path but then bail. 
     547         */ 
     548 
     549        // Skip duplicates 
     550        if ( isset( $this->attachments[ $attachment->url ] ) ) { 
     551            $post_id = $this->attachments[ $attachment->url ]; 
     552            printf( "<em>%s</em><br />\n", __( 'Skipping duplicate' ) . ' ' . $attachment->url ); 
    623553            // Get new attachment URL 
    624554            $attachment_url = wp_get_attachment_url( $post_id ); 
     555 
    625556            // Update url_remap array 
    626             $this->url_remap[$fullsize] = $attachment_url; 
     557            $this->url_remap[ $attachment->url ] = $attachment_url; 
    627558            $sized = image_downsize( $post_id, 'medium' ); 
    628             if ( isset( $sized[0] ) ) { 
    629                 $this->url_remap[$thumb] = $sized[0]; 
     559            foreach ( $attachment->thumbs as $thumb ) { 
     560                if ( isset( $sized[0] ) ) { 
     561                    $this->url_remap[ $thumb ] = preg_match( '/scaled\.?500\./', $thumb ) ? $sized[0] : $attachment->url; 
     562                } 
     563            } 
     564 
     565            continue; 
     566        } 
     567 
     568        echo '<em>Importing attachment ' . htmlspecialchars( $attachment->url ) . "...</em>"; 
     569        $upload = $this->fetch_remote_file( $post, $attachment->url ); 
     570 
     571        if ( is_wp_error( $upload ) ) { 
     572            printf( "<em>%s</em><br />\n", __( 'Remote file error:' ) . ' ' . htmlspecialchars( $upload->get_error_message() ) ); 
     573            continue; 
     574        } else { 
     575            printf( "<em> (%s)</em><br />\n", size_format( filesize( $upload['file'] ) ) ); 
     576        } 
     577 
     578        if ( 0 == filesize( $upload['file'] ) ) { 
     579            print __( "Zero length file, deleting..." ) . "<br />\n"; 
     580            @unlink( $upload['file'] ); 
     581            continue; 
     582        } 
     583 
     584        $info = wp_check_filetype( $upload['file'] ); 
     585        if ( false === $info['ext'] ) { 
     586            printf( "<em>%s</em><br />\n", $upload['file'] . __( 'has an invalid file type') ); 
     587            @unlink( $upload['file'] ); 
     588            continue; 
     589        } 
     590 
     591        // as per wp-admin/includes/upload.php 
     592        $attachment_post = array ( 
     593            'post_title' => $post->post_title, 
     594            'post_content' => '', 
     595            'post_status' => 'inherit', 
     596            'guid' => $upload['url'], 
     597            'post_mime_type' => $info['type'] 
     598        ); 
     599 
     600        $post_id = (int) wp_insert_attachment( $attachment_post, $upload['file'], $post->ID ); 
     601        $attachment_meta = @wp_generate_attachment_metadata( $post_id, $upload['file'] ); 
     602        wp_update_attachment_metadata( $post_id, $attachment_meta ); 
     603 
     604        // Fire an action to do anything we might like to do to the post after adding an attachment (e.g. inserting shortcodes). 
     605        // This is not implemented within the plugin; it's just here so that it can be extended. 
     606        do_action( 'posterous_process_attachment_post_update', $post, $post_id, $attachment->url, $media_types ); 
     607 
     608        // Add remote_url to post_meta 
     609        add_post_meta( $post_id, 'posterous_' . $this->bid . '_attachment', $attachment->url, true ); 
     610        // Add remote_url to hash table 
     611        $this->attachments[ $attachment->url] = $post_id; 
     612 
     613        // Get new attachment URL 
     614        $attachment_url = wp_get_attachment_url( $post_id ); 
     615        // Update url_remap array 
     616        $this->url_remap[ $attachment->url ] = $attachment_url; 
     617        $sized = image_downsize( $post_id, 'medium' ); 
     618        if ( isset( $attachment->thumbs ) ) { 
     619            foreach ( $attachment->thumbs as $thumb ) { 
     620                if ( isset( $sized[0] ) ) { 
     621                    $this->url_remap[ $thumb ] = preg_match( '/scaled\.?500\./', $thumb ) ? $sized[0] : $attachment->url; 
     622                } 
    630623            } 
    631624        } 
     
    724717     * @return array 
    725718     */ 
    726     function extract_post_media( $post_content, $media ) { 
    727         $post_content = stripslashes( $post_content ); 
    728         $post_content = str_replace( "\n", '', $post_content ); 
    729         $post_content = $this->min_whitespace( $post_content ); 
     719    function extract_post_media( $media ) { 
    730720        $attachments = array(); 
    731         $attachments['thumb'] = array(); 
    732         $attachments['fullsize'] = array(); 
    733         $attachments['single'] = array(); 
    734  
    735         // Find all linked images 
    736         $matches = array(); 
    737         preg_match_all( '|<a.*?href=[\'"](.*?)[\'"].*?><img.*?src=[\'"](.*?)[\'"].*?>|i', $post_content, $matches ); 
    738  
    739         foreach ( $matches[1] as $i => $url ) { 
    740             if ( strstr( $url, 'posterous.com' ) ) { 
    741                 $attachments['thumb'][$i] = $matches[2][$i]; 
    742                 $attachments['fullsize'][$i] = $this->remove_scaling_from_image_url( $url ); 
    743             } 
    744         } 
    745  
    746         // Find all not linked images 
    747         $matches = array(); 
    748         preg_match_all( '|<img.*?src=[\'"](.*?)[\'"].*?>|i', $post_content, $matches ); 
    749         foreach ( $matches[1] as $i => $url ) { 
    750             $url = $this->remove_scaling_from_image_url( $url ); 
    751             if ( strstr( $url, 'posterous.com' ) && !in_array( $url, $attachments['thumb'] ) && !in_array( $url, $attachments['fullsize'] ) ) { 
    752                 $attachments['single'][$i] = $url; 
    753             } 
    754         } 
    755         $attachments['single'] = array_unique( $attachments['single'] ); 
    756         if ( is_array( $media ) ) { 
    757             foreach ( $media as $m ) { 
    758                 $attachments['fullsize'][] = $m->url; 
    759             } 
    760         } 
    761         unset( $post_content, $matches ); 
     721        foreach ( $media as $file ) { 
     722            if ( 'image' == $file->type ) { 
     723                $obj = new stdClass(); 
     724                $obj->url = $file->url; 
     725                $obj->thumbs = array(); 
     726                foreach ( $file->resized as $resized ) { 
     727                    $obj->thumbs[] = $resized; 
     728                } 
     729                $attachments[] = $obj; 
     730            } else { 
     731                $obj = new stdClass(); 
     732                $obj->url = $file->url; 
     733                $attachments[] = $obj; 
     734            } 
     735             
     736        } 
    762737        return $attachments; 
    763738    } 
Note: See TracChangeset for help on using the changeset viewer.