Ash C++ Template Library (AshTL) Documentation

/********************************************************************************

    This file is part of the ASH Template Library (AshTL)
    <http://ashtl.sourceforge.net>

    Copyright (c) 2010-2012  Adam Martinson (Loki)

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

 ********************************************************************************/

/*
 * Copyright (c) 2010-2012  Adam Martinson (Loki)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright notice, this
 *       list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright notice,
 *       this list of conditions and the following disclaimer in the documentation
 *       and/or other materials provided with the distribution.
 *     * Neither the name of the copyright holder nor the name(s) of the author(s)
 *       may be used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
 * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


#include <cstdlib>
#include <cstdio>
#include <cmath>
#include <cstring>
#include <iostream>
#include <set>
#include <queue>
#include <omp.h>

#include "ash/random.h"
#include "ash/dense_hashgrid.h"
//#include "ash/sparse_hashgrid.h"
//#include "ash/dense_hashmatrix.h"
//#include "ash/sparse_hashmatrix.h"
#include "ash/type_traits.h"
#include "ash/meta_morton.h"

#if _ASH_CC == _ASH_CC_MS
    #include <Windows.h>
#endif
#include <GL/gl.h>
#include <GL/glut.h>
//get rid of some anachronism warnings
extern "C" {
    typedef void(*glutDisplayFunc_cb)(void);
    typedef void(*glutIdleFunc_cb)(void);
    typedef void(*glutTimerFunc_cb)(int);
}

#define NUM_THREADS 8
#define CELL_PIXELS 3
#define DIST_APART 0.5
#define MOVE_SPEED 0.125 //(1.0 - DIST_APART) //MAX
#define COLOR_DIFF 0.25
#define DIE_DIST 1.0
#define FPS_REFRESH 3000
#ifdef UINT
    #undef UINT
#endif
#define UINT uint16

#define DEBUG(...)
//#define DEBUG debug

using namespace ash;
using std::cout;
using std::endl;

/* Look Ma, no locks!  (Lies, OMP handles it though)
 *
 * This is the simplest example of a case where hashgrids shine.
 * Take a MMOG server.  You have a world with a bunch of players running around
 * interacting with eachother, in ways that may or may not be legal according to
 * the mechanics.  For example, Player A is trying to cast Magic Missile at
 * Player B, but Player B is running away and may be out of range.  The server
 * needs to determine whether or not Player B gets hit.  Player A and Player B
 * are both Independent Agents.  The range of Magic Missile is the Event Range.
 * The Maximum Event Range for the system is the maximum distance at which 2
 * Independent Agents can affect one another.  If Player A and Player B are more
 * than the Maximum Event Range apart, what one is trying to do is irrelevant to
 * what the other is trying to do, and Events generated by one may be processed
 * in parallel with events generated by the other. Hashgrids provide a built-in
 * solution to the problem of determining which events may be processed in
 * parallel:
 * <http://ashtl.sourceforge.net/morton_overview.html>
 * <http://ashtl.sourceforge.net/morton_concurrency.html>
 *
 * In the following example, we have a simple 2D world full of Independent
 * Agents.  There is only 1 type of Event: movement.  Each Agent may move
 * anywhere within (+/-MOVE_SPEED, +/-MOVE_SPEED) of their current location each
 * tick.  The only rule is, they may not come within DIST_APART of another
 * Agent.  However, these are not smart Agents - not in this Matrix.  They
 * simply choose a random vector and try to move.  As a bonus feature, when
 * agents get within DIE_DIST of another agent with a color within COLOR_DIFF of
 * their own color, we kill both of them.
 */

template <class HashMatrix, uint8 Offset>
struct UpdateQueue
{
    typedef HashMatrix matrix_type;
    typedef typename matrix_type::morton_type morton_type;
    typedef typename matrix_type::size_type size_type;

    typedef typename std::set<morton_type, std::less<morton_type>, libc_allocator_with_realloc<morton_type> > queue_type;

    static uint8 mask(morton_type const& pos) {
        return (_mask & pos) >> _offset;
    }

    bool schedule(morton_type const& pos) {
        uint8 i = mask(pos);
        assert(i < queue_size);
        return queue[i].insert(pos).second;
    }

    void get(queue_type& q, uint8 i) {
        assert(i < queue_size);
        if (q.size())
            q.clear();

        q.swap(queue[i]);
    }

    static const uint8 queue_size = m_pow<uint8, 4, matrix_type::D>::value;

private:
    queue_type queue[queue_size];

    static const size_type _offset = Offset * matrix_type::D;
    static const size_type _mask = (static_cast<size_type>(-1) >>
                                    (bitsizeof(size_type) - (matrix_type::D*2))) << _offset;
    
};


static Random<GLfloat> rand_color (0.25, 1.0);
struct Agent
{
    explicit Agent() : pos(0,0), dead(false) {
        color[0] = rand_color();
        color[1] = rand_color();
        color[2] = rand_color();
        #pragma omp atomic
        ++count;
    }
    explicit Agent(PointXY p) : pos(p), dead(false) {
        color[0] = rand_color();
        color[1] = rand_color();
        color[2] = rand_color();
        #pragma omp atomic
        ++count;
    }
    ~Agent()
    {
        #pragma omp atomic
        --count;
    }

    double dist(PointXY p) const {
        return sqrt(pow(pos.x - p.x, 2) + pow(pos.y - p.y, 2));
    }

    double color_diff(Agent const& a) const {
        return sqrt(pow(color[0] - a.color[0], 2) +
                    pow(color[1] - a.color[1], 2) +
                    pow(color[2] - a.color[2], 2));
    }

    PointXY pos;
    GLfloat color[3];
    bool    dead;
    static UINT count;
};
UINT Agent::count = 0;

struct Event
{
    Event(Agent* a, PointXY p) : agent(a), pos(p) {
        assert(agent != NULL);
    }

    Agent* agent;
    PointXY pos;
};

template <class Coord>
struct Cell
{

    typedef typename std::queue<Event> queue_type;
    typedef std::set<Agent*> set_type;

    Cell() : coord(), events(), agents(), dirty(false), locked(false) { }
    explicit Cell(Coord c) : coord(c), events(), agents(), dirty(false), locked(false) { }

    void queue_events() {
        static Random<double> rand_gen(-MOVE_SPEED, MOVE_SPEED);
        assert(!locked);
        locked = true;
        //generate a random movement for each agent
        //each agent does whatever it wants, validity of events is checked during update
        for (typename set_type::iterator itr = agents.begin(); itr != agents.end(); ++itr)
        {
            if (!(*itr)->dead)
            {
                PointXY p ((*itr)->pos.x + rand_gen(), (*itr)->pos.y + rand_gen());
                assert(fabs((*itr)->pos.x - p.x) <= MOVE_SPEED && fabs((*itr)->pos.y - p.y) <= MOVE_SPEED);
                assert(*itr != NULL);
                Event e (*itr, p);
                assert(e.agent != NULL);
                events.push(e);
            }
        }
        assert(locked);
        locked = false;
    }

    void cleanup() {
        assert (!locked);
        locked = true;
        assert(!events.size());
        assert(agents.size());
        assert(dirty);
        typename set_type::iterator itr = agents.begin();
        while (itr != agents.end())
        {
            assert(*itr);
            if ((*itr)->dead)
            {
                Agent* a = *itr;
                agents.erase(itr++);
                delete a;
            }
            else
            {
                ++itr;
            }
        }
        dirty = false;
        assert(locked);
        locked = false;
    }

    Coord coord;
    queue_type events;
    set_type agents;
    bool dirty;
    mutable volatile bool locked; //not a real lock, just used for assertions
};

typedef PCoordXY<UINT> Coord;
//typedef dense_mortongrid<Coord, Cell<Coord> > Map;
typedef dense_hashgrid<Coord, Cell<Coord> > Map;
//typedef sparse_hashgrid<Coord, Cell<Coord> > Map;
//typedef dense_hashmatrix<Coord, Cell<Coord> > Map;
//typedef sparse_hashmatrix<Coord, Cell<Coord> > Map;
typedef UpdateQueue<Map, 0> Updates;

static void update_cell(Map& map, UINT m) {
    Cell<Coord>* cell = &map[m];
    assert(!cell->locked);
    cell->locked = true;

    for (Cell<Coord>::set_type::iterator itr = cell->agents.begin();
        itr != cell->agents.end(); ++itr) {
        for (Map::range_iterator ritr = map.range_begin(cell->coord, 1);
            ritr != map.end(); ++ritr) {
            if (&ritr->second != cell)
            {
                assert(!ritr->second.locked);
                ritr->second.locked = true;
            }
            for (Cell<Coord>::set_type::iterator oitr = ritr->second.agents.begin();
                oitr != ritr->second.agents.end(); ++oitr) {
                if (*oitr == *itr)
                    continue;
                if ((*itr)->dist((*oitr)->pos) < DIE_DIST &&
                    (*itr)->color_diff(**oitr) < COLOR_DIFF)
                {
                    (*itr)->dead = true;
                    cell->dirty = true;
                    (*oitr)->dead = true;
                    if (&ritr->second != cell)
                        ritr->second.dirty = true;
                }
            }
            if (&ritr->second != cell)
            {
                assert(ritr->second.locked);
                ritr->second.locked = false;
            }
        }
    }

    while(cell->events.size()) {
        assert(cell->events.front().agent != NULL);
        Event e = cell->events.front();
        bool bad = e.agent->dead;

        if (!bad) {
            //check if the new position would be off the map
            bad = e.pos.x < 0.0 || e.pos.y < 0.0 ||
                    static_cast<Map::size_type>(e.pos.x) >= map.pcoord_max.x ||
                    static_cast<Map::size_type>(e.pos.y) >= map.pcoord_max.y;
        }

        //check if the new position would be too close to other agents
        for (Map::const_range_iterator ritr = map.range_begin(cell->coord, 1);
                ritr != map.end() && !bad; ++ritr) {
            if (&ritr->second != cell)
            {
                assert(!ritr->second.locked);
                ritr->second.locked = true;
            }
            for (Cell<Coord>::set_type::const_iterator oitr = ritr->second.agents.begin();
                    oitr != ritr->second.agents.end() && !bad; ++oitr) {
                if (*oitr != e.agent && (*oitr)->dist(e.pos) < DIST_APART)
                {
                    bad = true;
                }
            }
            if (&ritr->second != cell)
            {
                assert(ritr->second.locked);
                ritr->second.locked = false;
            }
        }

        //if everything checks out, process the event, otherwise ignore it
        if (!bad) {
            //check if the new position is outside the current cell
            Coord new_coord (static_cast<UINT>(e.pos.x), static_cast<UINT>(e.pos.y));
            if (cell->coord != new_coord)
            {
                assert(abs(new_coord.x - cell->coord.x) <= 1);
                assert(abs(new_coord.y - cell->coord.y) <= 1);
                cell->agents.erase(e.agent);
                assert(!map[new_coord].locked);
                map[new_coord].locked = true;
                map[new_coord].agents.insert(e.agent);
                assert(map[new_coord].locked);
                map[new_coord].locked = false;
            }
            e.agent->pos = e.pos;
        }

        cell->events.pop();
    } //end while
    assert(cell->locked);
    cell->locked = false;
}

static Map map(static_cast<UINT>(-1));
static Updates updates;
static uint64 frames = 0;
static double fps = 0.0;

static void show_fps()
{
    if (fps > 0.0)
    {
        char buf[32];
        sprintf(buf, "fps: %7.3f    agents: %5d ", fps, Agent::count);
        glColor3f(1.0, 1.0, 1.0);
        glRasterPos2i(-1, -1);
        for (uint8 i = 0; i < (sizeof(buf) - 1); ++i)
            glutBitmapCharacter(GLUT_BITMAP_9_BY_15, buf[i]);
    }
}

static void debug(char const* s, int v, bool b)
{
    #pragma omp critical
    {
        if (b)
            cout << s << " (" << v << ") {" << endl;
        else
            cout << "} " << s << " (" << v << ")" << endl;
    }
}

static void process_map()
{
    glClear(GL_COLOR_BUFFER_BIT);

    for (uint8 i = 0; i < updates.queue_size; ++i)
    {
        uint8 j = Updates::mask(i-4);
        uint8 k = Updates::mask(i-8);
        uint8 l = Updates::mask(i-12);

        #pragma omp parallel default(shared) //num_threads(4)
        {
            //draw - 1x1 access
            #pragma omp master //OpenGL contexts are thread-specific
            {
                static const size_t last = static_cast<UINT>(MortonXY<UINT>(Map::pcoord_max));
                DEBUG("draw", i, true);
                //iterating this way only works well with a full map
                //normally we'd use the STL iterators and skip the
                //cells for which the morton doesn't match
                for (size_t m = i; m <= last; m += 16)
                {
                    Map::iterator itr = map.find(m);
                    if (itr == map.end() || !itr->second.agents.size())
                        continue;

                    assert(!itr->second.locked);
                    itr->second.locked = true;
                    for (Cell<Coord>::set_type::iterator oitr = itr->second.agents.begin();
                            oitr != itr->second.agents.end(); ++oitr)
                    {
                        GLfloat x = static_cast<GLfloat>((*oitr)->pos.x * 2.0);
                        GLfloat y = static_cast<GLfloat>((*oitr)->pos.y * 2.0);
                        x /= Map::pcoord_max.x;
                        y /= Map::pcoord_max.y;
                        x -= 1.0;
                        y -= 1.0;
                        glColor3fv((*oitr)->color);
                        glBegin(GL_POINTS);
                        glVertex3f(x, y, 0);
                        glEnd();
                    }
                    assert(itr->second.locked);
                    itr->second.locked = false;
                }
                DEBUG("draw", i, false);
            }

            #pragma omp sections
            {
                //cleanup - 1x1 access
                #pragma omp section
                {
                    DEBUG("clean", j, true);
                    static const size_t last = static_cast<UINT>(MortonXY<UINT>(Map::pcoord_max));
                    //iterating this way only works well with a full map
                    //normally we'd use the STL iterators and skip the
                    //cells for which the morton doesn't match
                    //we could make this parallel, but there's no point
                    //most of the CPU time is in the 3x3 section
                    for (size_t m = j; m <= last; m += 16)
                    {
                        Map::iterator itr = map.find(m);
                        if (itr == map.end() || !itr->second.dirty)
                            continue;

                        itr->second.cleanup();
                    }
                    DEBUG("clean", j, false);
                }

                //"listener" - 1x1 access
                #pragma omp section
                {
                    static const size_t last = static_cast<UINT>(MortonXY<UINT>(Map::pcoord_max));
                    DEBUG("listen", k, true);
                    //iterating this way only works well with a full map
                    //normally we'd use the STL iterators and skip the
                    //cells for which the morton doesn't match
                    //we could make this parallel, but there's no point
                    //most of the CPU time is in the 3x3 section
                    for (size_t m = k; m <= last; m += 16)
                    {
                        Map::iterator itr = map.find(m);
                        if (itr == map.end() || !itr->second.agents.size())
                            continue;

                        itr->second.queue_events();

                        //#pragma omp critical
                        updates.schedule(itr->first);
                    }
                    DEBUG("listen", k, false);
                    //#pragma omp taskwait
                } //end omp section

                //update - 3x3 access
                #pragma omp section
                {
                    Updates::queue_type queue;
                    DEBUG("update", l, true);

                    //run updates
                    queue.clear();
                    updates.get(queue, l); //updates now stored in queue
                    
                    for (Updates::queue_type::iterator itr = queue.begin(); itr != queue.end(); ++itr)
                    {
                        UINT m = *itr;

                    #if _ASH_CC != _ASH_CC_MS
                        #pragma omp task firstprivate(m)
                    #endif
                        update_cell(map, m);
                    }
                    DEBUG("update", l, false);
                } //end omp section
            } //end omp sections
        } //end omp parallel
    }
    show_fps();
    glutSwapBuffers();
    ++frames;
}

static void calc_fps(int /*ignored*/)
{
    static uint64 prev_frames = 0;
    uint64 f = frames - prev_frames;
    prev_frames = frames;

    fps = static_cast<double>(f)/(static_cast<double>(FPS_REFRESH)/1000.0);
    glutTimerFunc(FPS_REFRESH, (glutTimerFunc_cb)calc_fps, 0);
}

int main(int argc, char** argv) {
    long n = 0;
#if _ASH_CC != _ASH_CC_MS
    int lim = omp_get_thread_limit();
    omp_sched_t sched = omp_sched_auto;
#else
    int lim = omp_get_max_threads();
#endif

    omp_set_dynamic(true);
    omp_set_nested(true);

    if (argc > 1)
        n = strtol(argv[1], NULL, 10);

    cout << "using up to ";
    if (n > 0 && n < lim)
    {
        cout << n << " threads" << endl;
        omp_set_num_threads(n);
    }
    else if (n > 0)
    {
        cout << lim << " threads (max)" << endl;
        omp_set_num_threads(lim);
    }
    else
    {
        cout << static_cast<int>(NUM_THREADS) << " threads (default)" << endl;
        omp_set_num_threads(NUM_THREADS);
    }

#if _ASH_CC != _ASH_CC_MS
    if (argc > 2)
    {
        if (!strcmp(argv[2], "static"))
            sched = omp_sched_static;
        else if (!strcmp(argv[2], "dynamic"))
            sched = omp_sched_dynamic;
        else if (!strcmp(argv[2], "guided"))
            sched = omp_sched_guided;
    }

    switch (sched)
    {
    case omp_sched_static:
        cout << "using static scheduler" << endl;
        break;
    case omp_sched_dynamic:
        cout << "using dynamic scheduler" << endl;
        break;
    case omp_sched_guided:
        cout << "using guided scheduler" << endl;
        break;
    default:
        cout << "using auto scheduler" << endl;
        break;
    }
    omp_set_schedule(sched, 0);
#endif

    //fill the map with cells & agents
    //we start with 1 agent in the middle of each cell
    cout << "building map...";

    for(UINT x = 0; x < Map::pcoord_max.x; ++x)
    {
        for (UINT y = 0; y < Map::pcoord_max.y; ++y)
        {
            Coord c (x, y);
            PointXY p (double(x) + 0.5, double(y) + 0.5);
            map[c] = Cell<Coord>(c);
            Agent* a = new Agent(p);
            map[c].agents.insert(a);
        }
    }

    cout << "done" << endl << "initializing glut" << endl;
    
    argc = 1;
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_DOUBLE);
    glutInitWindowSize(static_cast<int>(Map::pcoord_max.x) * CELL_PIXELS,
                       static_cast<int>(Map::pcoord_max.y) * CELL_PIXELS);
    glutInitWindowPosition(100, 100);
    glutCreateWindow("AshTL OpenMP example");
    glutDisplayFunc((glutDisplayFunc_cb)process_map);
    glutIdleFunc((glutIdleFunc_cb)process_map);
    glutTimerFunc(FPS_REFRESH, (glutTimerFunc_cb)calc_fps, 0);
    glClearColor(0.0, 0.0, 0.0, 1.0);
    glLoadIdentity();
    glPointSize(static_cast<GLfloat>(CELL_PIXELS)/4.0);

    cout << "entering main loop" << endl;
    glutMainLoop();

    //glutMainLoop() never returns, so we'll let the OS handle cleanup.
}