Showing posts with label broken. Show all posts
Showing posts with label broken. Show all posts

Thursday, April 10, 2014

Monitor Broken Links Using MCC Level Scripts

Note: I recommend you take a look at the official solution from the AdWords Scripts team before implementing this solution.

For anyone who didn't know, they are finally here in Beta form. AdWords scripts are now available at the MCC level. If you want access to these beta features, all you need to do is apply here and wait for the team at Google to give you access.

So what's new with MCC level scripting? The full details are at the Google Developers page, but here is a summary. You can now kick off selectors on Accounts by using the MccApp object. Filtering by stats is the same as other selectors only now you run it at the account level.  As you work through each Account, once you set the account you want to work on using MccApp.select(), everything works just like it used to.

The one new function you will probably want to take advantage of is executeInParallel() which allows you to execute the same code across up to 50 accounts at the same time. So you can kick off a reporting script to run across all of your accounts, then collect the results and send a single email or store it to a single spreadsheet. Also, scripts can now run up to 60 minutes using this method since you get 30 minutes of execution time to run the code on each account and 30 minutes to collect the results from the callback function.

To get you started with the new MccApp object, I thought I would take one of my most popular posts and rewrite it to run at the MCC level. Finding Broken Urls in your Account is a great example of how you can leverage the new executeInParallel() function to improve the monitoring of your MCC.

This script works very similarly to the previous script but has a few added features.  This script will check all your Keyword and Ad urls once per day. When you install this script, you should schedule it to run hourly in case there is a large account which can't be processed in the allotted timeframe, it can pick up from where it left off and continue processing. It controls this internally using labels.

Also, the results of this script are stored in a new spreadsheet for each run. I had issues with the last one where the script would overwrite the values in the spreadsheet before I had a chance to look at them. This eliminates that issue.  The spreadsheet is accessed from a summary email that looks like this, with each row containing a link to the spreadsheet tab with that account's results.
An Example Email from the Script
There are a few other features such as the ability to notify you when errors occur, report on redirects, and setting any number of response codes you are looking for.  Take it for a test drive and let me know what you think in the comments.

Also, just a quick note that I am getting back in the swing of things after getting married at the end of March so look for a much more frequent posting schedule. I am speaking at the Marketing Festival in Brno, Czech Republic at the end of October so I look forward to meeting anyone who can make it.

Thanks,
Russ

/******************************************
* Monitor Broken Links Using MCC Level Scripts
* Version 1.5
* Changelog v1.5
*   - Additional fixes from copy and paste errors
* Changelog v1.4
*   - Fixed INVALID_QUERY error
* Changelog v1.3
*   - Added previous version of report api to script until
*     I update my urls.
* Changelog v1.2
*   - Fixing INVALID_PREDICATE_ENUM_VALUE
* Changelog v1.1
*   - Stopped timeouts 
* Created By: Russ Savage
* FreeAdWordsScripts.com
******************************************/
var SCRIPT_NAME = 'Broken Url Checker';
var LOG_LEVEL = 'error'; //change this to debug if you want more logging
var NOTIFY = [''];
var SPREADSHEET_PREFIX = 'Broken Url Details'; // A timestamp is appended 
var NOTIFY_ON_ERROR = [''];
var STRIP_QUERY_STRING = true; //Drop everything after the ? in the url to speed things up
var REPORT_ON_REDIRECTS = true; //If you want to be able to track 301s and 302, turn this on
var VALID_RESPONSE_CODES = [200,301,302];
var URLS_CHECKED_FILE_NAME = 'UrlsAlreadyChecked-'+AdWordsApp.currentAccount().getCustomerId()+'.json';
var DONE_LABEL_PREFIX = 'All Urls Checked - ';
  
function main() {
  MccApp.accounts().withLimit(50).executeInParallel('checkUrls', 'reportResults'); 
}
    
function checkUrls() {
  try {
    debug('Processing account: '+AdWordsApp.currentAccount().getName());
      
    debug('Checking to see if we finished processing for today.');
    var dateStr = Utilities.formatDate(new Date(), AdWordsApp.currentAccount().getTimeZone(), 'yyyy-MM-dd');
    var finishedLabelName = DONE_LABEL_PREFIX+dateStr;
    var alreadyDone = AdWordsApp.labels().withCondition("Name = '"+finishedLabelName+"'").get().hasNext();
    if(alreadyDone) {
      info('All urls have been checked for today.');
      return '';
    }
    var labelIter = AdWordsApp.labels().withCondition("Name STARTS_WITH '"+DONE_LABEL_PREFIX+"'").get();
    while(labelIter.hasNext()) { labelIter.next().remove(); }
      
    debug('Checking for previous urls.');
    var urlsAlreadyChecked = readValidUrlsFromJSON();
    info('Found '+Object.keys(urlsAlreadyChecked).length+' urls already checked.');
      
    var toReportKeywords = [];
    var toReportAds = [];
    var didExitEarly = false;
    var keywordUrls = getKeywordUrls();
    for(var key in keywordUrls) {
      var kwRow = keywordUrls[key];
      var final_urls = kwRow.FinalUrls.split(';');
      for(var i in final_urls) {
        var url = cleanUrl(final_urls[i]);
        verifyUrl(kwRow,url,urlsAlreadyChecked,toReportKeywords);
        if(shouldExitEarly()) { didExitEarly = true; break; }
      }
    }
    if(!didExitEarly) {
      var adUrls = getAdUrls();
      for(var i in adUrls) {
        var adRow = adUrls[i];
        if(adRow.CreativeFinalUrls) {
          var final_urls = adRow.CreativeFinalUrls.split(';');
          for(var x in final_urls) {
            var url = cleanUrl(final_urls[x]);
            verifyUrl(adRow,url,urlsAlreadyChecked,toReportAds);
          }
        }
        if(shouldExitEarly()) { didExitEarly = true; break; }
      }
    }
    var returnData = {
      accountId : AdWordsApp.currentAccount().getCustomerId(),
      accountName : AdWordsApp.currentAccount().getName(),
      uniqueUrlsChecked : Object.keys(urlsAlreadyChecked).length,
      brokenKeywords : toReportKeywords,
      brokenAds : toReportAds,
      didExitEarly : didExitEarly
    };
    if(didExitEarly) {
      writeValidUrlsToJSON(urlsAlreadyChecked);
    } else {
      AdWordsApp.createLabel(finishedLabelName, 'Label created by '+SCRIPT_NAME, '#C0C0C0');
      writeValidUrlsToJSON({});
    }
    return JSON.stringify(returnData);
  } catch(e) {
    // This error handling helps notify you when things don't work out well.
    error(e);
    if(MailApp.getRemainingDailyQuota() >= NOTIFY_ON_ERROR.length) {
      var acctName = AdWordsApp.currentAccount().getName();
      var acctId = AdWordsApp.currentAccount().getCustomerId();
      for(var i in NOTIFY_ON_ERROR) {
        info('Sending mail to: '+NOTIFY_ON_ERROR[i]);
        MailApp.sendEmail(NOTIFY_ON_ERROR[i], 'ERROR: '+SCRIPT_NAME+' - '+acctName+' - ('+acctId+')', e);
      }
    } else {
      error('Out of email quota for the day. Sending a carrier pigeon.'); 
    }
    return '';
  }
    
  function shouldExitEarly() {
    return (AdWordsApp.getExecutionInfo().getRemainingTime() < 60);
  } 
  
  function verifyUrl(row,url,urlsAlreadyChecked,toReport) {
    if(!urlsAlreadyChecked[url]) {
      info('Checking url: ' + url);
      var urlCheckResults = checkUrl(url);
      if(!urlCheckResults.isValid) {
        row['cleanUrl'] = url;
        row['responseCode'] = urlCheckResults.responseCode;
        toReport.push(row);
      }
      urlsAlreadyChecked[url] = urlCheckResults;
    } else {
      if(!urlsAlreadyChecked[url].isValid) {
        row['cleanUrl'] = url;
        row['responseCode'] = urlsAlreadyChecked[url].responseCode;
        toReport.push(row);
      }
    }
  }
  
  function checkUrl(url) {
    var retVal = { responseCode : -1, isValid: false };
    var httpOptions = {
      muteHttpExceptions:true,
      followRedirects:(!REPORT_ON_REDIRECTS)
    };
    try {
      retVal.responseCode = UrlFetchApp.fetch(url, httpOptions).getResponseCode();
      retVal.isValid = isValidResponseCode(retVal.responseCode);
    } catch(e) {
      warn(e.message);
      //Something is wrong here, we should know about it.
      retVal.isValid = false;
    }
    return retVal;
  }
    
  function isValidResponseCode(resp) {
    return (VALID_RESPONSE_CODES.indexOf(resp) >= 0);
  }
    
  //Clean the url of query strings and valuetrack params  
  function cleanUrl(url) {
    if(STRIP_QUERY_STRING) {
      if(url.indexOf('?')>=0) {
        url = url.split('?')[0];
      }
    }
    if(url.indexOf('{') >= 0) {
      //Let's remove the value track parameters
      url = url.replace(/\{[^\}]*\}/g,'');
    }
    return url;
  }
    
  //Use the reporting API to pull this information because it is super fast.
  //The documentation for this is here: http://goo.gl/IfMb31
  function getKeywordUrls() {
    var OPTIONS = { includeZeroImpressions : true };
    var cols = ['CampaignId','CampaignName',
                'AdGroupId','AdGroupName',
                'Id','Criteria','KeywordMatchType',
                'IsNegative','FinalUrls','Impressions'];
    var report = 'KEYWORDS_PERFORMANCE_REPORT';
    var query = ['select',cols.join(','),'from',report,
                 'where CampaignStatus = ENABLED',
                 'and AdGroupStatus = ENABLED',
                 'and Status = ENABLED',
                 'during','LAST_7_DAYS'].join(' ');
    var results = {};
    var reportIter = AdWordsApp.report(query, OPTIONS).rows();
    while(reportIter.hasNext()) {
      var row = reportIter.next();
      if(row.IsNegative === 'true') { continue; }
      if(!row.FinalUrls) { continue; }
      if(row.KeywordMatchType === 'Exact') {
        row.Criteria = ['[',row.Criteria,']'].join('');
      } else if(row.Criteria === 'Phrase') {
        row.Criteria = ['"',row.Criteria,'"'].join('');
      }
      var rowKey = [row.CampaignId,row.AdGroupId,row.Id].join('-');
      results[rowKey] = row;
    }
    return results; 
  }
    
  //Use the reporting API to pull this information because it is super fast.
  //The documentation for this is here: http://goo.gl/8RHTBj
  function getAdUrls() {
    var OPTIONS = { includeZeroImpressions : true };
    var cols = ['CampaignId','CampaignName',
                'AdGroupId','AdGroupName',
                'AdType',
                'Id','Headline','Description1','Description2','DisplayUrl',
                'CreativeFinalUrls','Impressions'];
    var report = 'AD_PERFORMANCE_REPORT';
    var query = ['select',cols.join(','),'from',report,
                 'where CampaignStatus = ENABLED',
                 'and AdGroupStatus = ENABLED',
                 'and Status = ENABLED',
                 'during','TODAY'].join(' ');
    var results = {};
    var reportIter = AdWordsApp.report(query, OPTIONS).rows();
    while(reportIter.hasNext()) {
      var row = reportIter.next();
      if(!row.CreativeFinalUrls) { continue; }
      var rowKey = [row.CampaignId,row.AdGroupId,row.Id].join('-');
      results[rowKey] = row;
    }
    return results;
  }
    
  //This function quickly writes the url data to a file
  //that can be loaded again for the next run
  function writeValidUrlsToJSON(toWrite) {
    var file = getFile(URLS_CHECKED_FILE_NAME,false);
    file.setContent(JSON.stringify(toWrite));
  }
    
  //And this loads that stored file and converts it to an object
  function readValidUrlsFromJSON() {
    var file = getFile(URLS_CHECKED_FILE_NAME,false);
    var fileData = file.getBlob().getDataAsString();
    if(fileData) {
      return JSON.parse(fileData);
    } else {
      return {};
    }
  }
}
  
//This is the callback function that collects all the data from the scripts
//that were run in parallel on each account. More details can be found here:
// http://goo.gl/BvOPZo
function reportResults(responses) {
  var summaryEmailData = [];
  var dateTimeStr = Utilities.formatDate(new Date(), AdWordsApp.currentAccount().getTimeZone(), 'yyyy-MM-dd HH:m:s');
  var spreadsheetName = SPREADSHEET_PREFIX+' - '+dateTimeStr;
  for(var i in responses) {
    if(!responses[i].getReturnValue()) { continue; }
    var res = JSON.parse(responses[i].getReturnValue());
    var sheetUrl = writeResultsToSpreadsheet(res,spreadsheetName);
    summaryEmailData.push({accountId:res.accountId,
                           accountName:res.accountName,
                           didExitEarly:res.didExitEarly,
                           uniqueUrlsChecked:res.uniqueUrlsChecked,
                           numBrokenKeywords:res.brokenKeywords.length,
                           numBrokenAds:res.brokenAds.length,
                           sheetUrl: sheetUrl});
  }
  if(summaryEmailData.length > 0) {
    sendSummaryEmail(summaryEmailData);
  }
    
  function writeResultsToSpreadsheet(res,name) {
    var file = getFile(name,true);
    var spreadsheet;
    var maxRetries = 0;
    while(maxRetries < 3) {
      try {
        spreadsheet = SpreadsheetApp.openById(file.getId());
        break;
      } catch(e) {
        maxRetries++;
        Utilities.sleep(1000);
      }
    }
    if(!spreadsheet) { throw 'Could not open file: '+name; }
    if(spreadsheet.getSheetByName('Sheet1')) {
      spreadsheet.getSheetByName('Sheet1').setName(res.accountId);
    }
    var sheet = spreadsheet.getSheetByName(res.accountId);
    if(!sheet) {
      sheet = spreadsheet.insertSheet(res.accountId, spreadsheet.getSheets().length);
    }
    var toWrite = [['Type','Clean Url','Response Code','Campaign Name','AdGroup Name','Text','Full Url']];
    for(var i in res.brokenKeywords) {
      var row = res.brokenKeywords[i];
      toWrite.push(['Keyword',
                    row.cleanUrl,
                    row.responseCode,
                    row.CampaignName,
                    row.AdGroupName,
                    row.Criteria,
                    row.FinalUrls]); 
    }
    for(var i in res.brokenAds) {
      var row = res.brokenAds[i];
      toWrite.push([row.AdType,
                    row.cleanUrl,
                    row.responseCode,
                    row.CampaignName,
                    row.AdGroupName,
                    (row.Headline) ? [row.Headline,row.Description1,row.Description2,row.DisplayUrl].join('|') : '',
                    row.CreativeFinalUrls]);
    }
    var lastRow = sheet.getLastRow();
    var numRows = sheet.getMaxRows();
    if((numRows-lastRow) < toWrite.length) {
      sheet.insertRowsAfter(lastRow,toWrite.length-numRows+lastRow);
    }
    var range = sheet.getRange(lastRow+1,1,toWrite.length,toWrite[0].length);
    range.setValues(toWrite);
    if((sheet.getMaxColumns() - sheet.getLastColumn()) > 0) {
      sheet.deleteColumns(sheet.getLastColumn()+1, sheet.getMaxColumns() - sheet.getLastColumn());
    }
    file = DriveApp.getFileById(spreadsheet.getId());
    try {
      file.setSharing(DriveApp.Access.ANYONE_WITH_LINK, DriveApp.Permission.VIEW);
    } catch(e) {
      file.setSharing(DriveApp.Access.DOMAIN_WITH_LINK, DriveApp.Permission.VIEW);
    }
    //This gives you a link directly to the spreadsheet sheet.
    return (spreadsheet.getUrl() + '#gid=' + sheet.getSheetId());
  }
    
  //This function builds the summary email and sends it to the people in
  //the NOTIFY list
  //This function builds the summary email and sends it to the people in
  //the NOTIFY list
  function sendSummaryEmail(summaryEmailData) {
    var subject = SCRIPT_NAME+' Summary Results';
    var body = subject;
    var htmlBody = '<html><body>'+subject;
    htmlBody += '<br/ >Should strip query strings: '+STRIP_QUERY_STRING;
    htmlBody += '<br/ >Report on redirects: '+REPORT_ON_REDIRECTS;
    htmlBody += '<br/ >Valid response codes: '+VALID_RESPONSE_CODES;
    htmlBody += '<br/ ><br/ >';
    htmlBody += '<table border="1" width="95%" style="border-collapse:collapse;">';
    htmlBody += '<tr>';
    htmlBody += '<td align="left"><b>Acct Id</b></td>';
    htmlBody += '<td align="left"><b>Acct Name</b></td>';
    htmlBody += '<td align="left"><b>Exited Early</b></td>';
    htmlBody += '<td align="center"><b>Unique Urls Checked</b></td>';
    htmlBody += '<td align="center"><b># Broken Keyword Urls</b></td>';
    htmlBody += '<td align="center"><b># Broken Ad Urls</b></td>';
    htmlBody += '<td align="center"><b>Full Report</b></td>';
    htmlBody += '</tr>';
    for(var i in summaryEmailData) {
      var row = summaryEmailData[i];
      htmlBody += '<tr><td align="left">'+ row.accountId +
                 '</td><td align="left">' + row.accountName + 
                 '</td><td align="left">' + row.didExitEarly + 
                 '</td><td align="center">' + row.uniqueUrlsChecked + 
                 '</td><td align="center">' + row.numBrokenKeywords + 
                 '</td><td align="center">' + row.numBrokenAds + 
                 '</td><td align="left"><a href="'+row.sheetUrl+'">' + 'Show Details' + 
                 '</a></td></tr>';
    }
    htmlBody += '</table>';
    htmlBody += '<br/ >';
    htmlBody += Utilities.formatDate(new Date(),AdWordsApp.currentAccount().getTimeZone(),'MMMM dd, yyyy @ hh:mma z');
    htmlBody += '.  Completed. '+Object.keys(summaryEmailData).length+' Accounts checked.';
    htmlBody += '</body></html>';
    var options = { htmlBody : htmlBody };
    for(var i in NOTIFY) {
      MailApp.sendEmail(NOTIFY[i], subject, body, options);
    }
  }
}
  
//This function finds a given file on Google Drive
//If it does not exist, it creates a new file
//if isSpreadsheet is set, it will create a new spreadsheet
//otherwise, it creates a text file.
function getFile(fileName,isSpreadsheet) {
  var maxRetries = 0;
  var errors = [];
  while(maxRetries < 3) {
    try {
      var fileIter = DriveApp.getFilesByName(fileName);
      if(!fileIter.hasNext()) {
        info('Could not find file: '+fileName+' on Google Drive. Creating new file.');
        if(isSpreadsheet) {
          return SpreadsheetApp.create(fileName);
        } else {
          return DriveApp.createFile(fileName,'');
        }
      } else {
        return fileIter.next();
      }
    } catch(e) {
      errors.push(e);
      maxRetries++;
      Utilities.sleep(1000);
    }
  }
  if(maxRetries == 3) {
    throw errors.join('. ');
  }
}
  
//Some functions to help with logging
var LOG_LEVELS = { 'error':1, 'warn':2, 'info':3, 'debug':4 };
function error(msg) { if(LOG_LEVELS['error'] <= LOG_LEVELS[LOG_LEVEL]) { log('ERROR',msg); } }
function warn(msg)  { if(LOG_LEVELS['warn']  <= LOG_LEVELS[LOG_LEVEL]) { log('WARN' ,msg); } }
function info(msg)  { if(LOG_LEVELS['info']  <= LOG_LEVELS[LOG_LEVEL]) { log('INFO' ,msg); } }
function debug(msg) { if(LOG_LEVELS['debug'] <= LOG_LEVELS[LOG_LEVEL]) { log('DEBUG',msg); } }
function log(type,msg) { Logger.log(type + ' - ' + msg); }