/**
 * An implementation of the Boyer-Moore string search algorithm.  
 *
 * There are quite a few tricky corner cases when implementing this code.  
 * Here are the unit tests I used: 
 *
 *    "black cat" "black cat"                 --> 0
 *    "black cat" "ac"                        --> 2
 *    "black cat" "a"                         --> 2, 7
 *    "black cat" "b"                         --> 0
 *    "black cat" "t"                         --> 8
 *    "black cat" "cat"                       --> 6
 *    "black cat" " "                         --> 5
 *    "black cat" ""                          --> no match
 *    "" ""                                   --> no match
 *    "" "a"                                  --> no match
 *    "black cat" "pattern longer than text"  --> no match
 *
 *
 * - Niek Sanders
 *   http://www.cis.rit.edu/~njs8030/
 *
 */
#include <cassert>
#include <iostream>
#include <map>
#include <string>
#include <vector>


/**
 * Find all instances of a pattern using the Boyer-Moore algorithm.
 *
 * The degenerate cases of having an empty text or empty pattern are handled by
 * indicating that nothing matches.
 *
 * \param    text      Text through which to search.
 * \param    pattern   Pattern to search for.
 *
 * \param    starting index for each location where pattern occurs in text.
 */
std::vector<std::size_t> 
searchBoyerMoore( const std::string& text, const std::string& pattern ) {

    std::vector<std::size_t> matchIndices;

    // bail early for degenerate cases
    if ( text.empty() || pattern.empty() ) {
        return matchIndices;
    }

    // slideback table is used when mismatched characters occur in pattern 
    //
    // Entries exist for all characters in pattern.  For a given entry, the
    // value indicates how far the last occuring instance of that entry is from
    // the end of the pattern.
    // 
    std::map<char,std::size_t>  slidebackTable; 
    for ( std::size_t i = 0; i < pattern.size(); i++ ) {
        slidebackTable[ pattern.at(i) ] = pattern.size() - i - 1;
    }
    
    // march through text if sufficient room for pattern
    int textPos = 0;
    while ( (textPos + pattern.size()) <= text.size() ) {

        // begin matching pattern for this text position
        for ( int p = pattern.size() - 1; p >= 0; p-- ) {

            char curText = text.at( textPos + p );
            char curPat  = pattern.at( p );

            // mismatch
            if ( curText != curPat ) {

                // ideally can slide forward whole matched pattern length
                int slideDist = p + 1;

                // if mismatch in pattern, must slideback a bit
                if ( slidebackTable.count( curText ) != 0 ) {
                    slideDist = slidebackTable[ curText ];
                }
                assert( slideDist > 0 );

                // leave pattern match, try next candidate text position
                textPos += slideDist;
                break;
            }

            // matched all characters
            if ( p == 0 ) {
                matchIndices.push_back( textPos );
                textPos += 1;
            }
        }
    }

    return matchIndices;
}


int main( int argc, char* argv[] ) {

    // check/extract cmdline arguments
    if ( argc != 3 ) {
        std::cerr << "usage: ./a.out <text> <pattern>" << std::endl;
        return 1;
    }
    std::string text(    argv[1] );
    std::string pattern( argv[2] );

    // use Boyer-Moore to search
    std::vector<std::size_t> matches = searchBoyerMoore( text, pattern );

    // dump results
    for ( std::size_t i = 0; i < matches.size(); i++ ) {
        std::cout << "match at " << matches.at(i) << std::endl;
    }

    // warn if no matches
    if ( matches.empty() ) {
        std::cout << "no matches" << std::endl;
    }

    std::cout << std::endl;

    return 0;
}
