Changeset 162:e4e7373b7c14

Show
Ignore:
Timestamp:
03/21/2012 01:47:32 PM (14 months ago)
Author:
Andreas Schaefer <gentryx@…>
Branch:
default
Message:
  • refactored CanvasCell? interface so that it works with both, CPU and GPU simulators,
  • GPUSimulator works, for now
Location:
src/examples/flowingcanvas
Files:
5 modified
1 moved

Legend:

Unmodified
Added
Removed
  • src/examples/flowingcanvas/CMakeLists.txt

    r158 r162  
    2121include_directories(./) 
    2222 
    23 add_executable(flowingcanvas ${SOURCES} ${MY_MOC_SOURCES}) 
    24 target_link_libraries(flowingcanvas ${LOCAL_LIBGEODECOMP_LINK_LIB}) 
     23set(CUDA_TOOLKIT_ROOT_DIR "/opt/cuda") 
     24find_package(CUDA) 
     25find_package(Qt4) 
     26 
     27if (CUDA_FOUND) 
     28  cuda_add_executable(flowingcanvas ${SOURCES} ${MY_MOC_SOURCES}) 
     29  target_link_libraries(flowingcanvas ${LOCAL_LIBGEODECOMP_LINK_LIB}) 
     30endif(CUDA_FOUND) 
     31 
  • src/examples/flowingcanvas/canvascell.h

    r159 r162  
    44#include <libgeodecomp/misc/floatcoord.h> 
    55#include <libgeodecomp/misc/topologies.h> 
     6 
     7#ifndef __host__ 
     8#define __host__ 
     9#endif 
     10 
     11#ifndef __device__ 
     12#define __device__ 
     13#endif 
    614 
    715namespace LibGeoDecomp { 
     
    6169    } 
    6270 
    63     template<typename COORD_MAP> 
    64     void update(const COORD_MAP& hood, const unsigned& nanoStep) 
    65     { 
    66         const CanvasCell& oldSelf = hood[Coord<2>()]; 
     71    // fixme: faster conversion if moore-neighborhood is used (instead of von neumann)? 
     72    __host__ __device__ 
     73    void update(const CanvasCell *up, const CanvasCell *same, const CanvasCell *down, const unsigned& nanoStep) 
     74    { 
     75        const CanvasCell& oldSelf = *same; 
    6776 
    6877        pos[0] = oldSelf.pos[0]; 
     
    7887        // fixme: move particles to other cells 
    7988        // fixme: kill dead particles 
    80         if (numParticles < 1) { 
    81             particles[numParticles] = Particle(pos[0], pos[1]); 
    82             numParticles = 1; 
    83         } 
     89        // if (numParticles < 1) { 
     90            // particles[numParticles] = Particle(pos[0], pos[1]); 
     91            // numParticles = 1; 
     92        // } 
    8493 
    8594        forceSet = oldSelf.forceSet; 
     
    8897            forceFixed[1] = oldSelf.forceFixed[1]; 
    8998        } else { 
    90             forceFixed[0] = (hood[Coord<2>(0, -1)].forceFixed[0] + 
    91                              hood[Coord<2>(-1, 0)].forceFixed[0] + 
    92                              hood[Coord<2>(1,  0)].forceFixed[0] + 
    93                              hood[Coord<2>(0,  1)].forceFixed[0]) * 0.25; 
    94             forceFixed[1] = (hood[Coord<2>(0, -1)].forceFixed[1] + 
    95                              hood[Coord<2>(-1, 0)].forceFixed[1] + 
    96                              hood[Coord<2>(1,  0)].forceFixed[1] + 
    97                              hood[Coord<2>(0,  1)].forceFixed[1]) * 0.25; 
    98         } 
    99  
    100         cameraLevel = (hood[Coord<2>(0, -1)].cameraLevel + 
    101                              hood[Coord<2>(-1, 0)].cameraLevel + 
    102                              hood[Coord<2>(1,  0)].cameraLevel + 
    103                              hood[Coord<2>(0,  1)].cameraLevel) * 0.25; 
    104          
    105         float gradientX = hood[Coord<2>(1, 0)].cameraLevel - hood[Coord<2>(-1, 0)].cameraLevel; 
    106         float gradientY = hood[Coord<2>(0, 1)].cameraLevel - hood[Coord<2>(0, -1)].cameraLevel; 
     99            forceFixed[0] = (up[0].forceFixed[0] + 
     100                             same[-1].forceFixed[0] + 
     101                             same[1].forceFixed[0] + 
     102                             down[0].forceFixed[0]) * 0.25; 
     103            forceFixed[1] = (up[0].forceFixed[1] + 
     104                             same[-1].forceFixed[1] + 
     105                             same[1].forceFixed[1] + 
     106                             down[0].forceFixed[1]) * 0.25; 
     107        } 
     108 
     109        cameraLevel = (up[0].cameraLevel + 
     110                             same[-1].cameraLevel + 
     111                             same[1].cameraLevel + 
     112                             down[0].cameraLevel) * 0.25; 
     113         
     114        float gradientX = same[1].cameraLevel - same[-1].cameraLevel; 
     115        float gradientY = down[0].cameraLevel - up[0].cameraLevel; 
    107116        forceVario[0] = 0; 
    108117        if ((gradientX > 0.011) || (gradientX < -0.011)) { 
     
    122131 
    123132        for (int i = 0; i < numParticles; ++i) { 
    124             Particle& p = particles[i]; 
     133            // Particle& p = particles[i]; 
    125134            // fixme: parameters 
    126             p.update(1.0, forceTotal[0], forceTotal[1], 1.0, 0.99); 
     135            // p.update(1.0, forceTotal[0], forceTotal[1], 1.0, 0.99); 
    127136        } 
    128137         
     
    146155//                         forceVario[1] + forceFixed[1]); 
    147156//         // moveParticles(); 
     157    } 
     158 
     159    template<typename COORD_MAP> 
     160    void update(const COORD_MAP& hood, const unsigned& nanoStep) 
     161    { 
     162        update(&hood[Coord<2>(0, -1)], &hood[Coord<2>(0, 0)], &hood[Coord<2>(0, 1)], nanoStep); 
    148163    } 
    149164 
  • src/examples/flowingcanvas/canvasinitializer.h

    r158 r162  
    1212    CanvasInitializer() : 
    1313        // SimpleInitializer<CanvasCell>(Coord<2>(240, 135), 100) 
    14         SimpleInitializer<CanvasCell>(Coord<2>(320, 180), 100) 
     14        // SimpleInitializer<CanvasCell>(Coord<2>(320, 180), 100) 
     15        SimpleInitializer<CanvasCell>(Coord<2>(384, 216), 100) 
    1516        // SimpleInitializer<CanvasCell>(Coord<2>(640, 360), 100) 
    1617    {} 
  • src/examples/flowingcanvas/canvaswriter.h

    r159 r162  
    6161    CanvasWriter(QImage **_outputFrame, 
    6262                 MonolithicSimulator<CanvasCell> *_sim) : 
    63         Writer("foo", _sim, 1), 
     63        Writer<CanvasCell>("foo", _sim, 1), 
    6464        outputFrame(_outputFrame), 
    65         mode(5) 
     65        mode(4) 
    6666    {} 
    6767 
  • src/examples/flowingcanvas/interactivesimulatorgpu.h

    r159 r162  
    55#include <libgeodecomp/misc/grid.h> 
    66 
     7__global__ void updateKernel(CanvasCell *curGrid, CanvasCell *newGrid, unsigned width, unsigned nanoStep) 
     8{ 
     9    int x = 1 + blockDim.x * blockIdx.x + threadIdx.x; 
     10    int y = 1 + blockDim.y * blockIdx.y + threadIdx.y; 
     11    int index = y * width + x; 
     12 
     13    newGrid[index].update( 
     14        curGrid + index - width,  
     15        curGrid + index, 
     16        curGrid + index + width, 
     17        nanoStep); 
     18} 
     19 
     20__global__ void loadGridFromTransferBuffer(CanvasCell *grid, CanvasCell *buffer, unsigned widthGrid, unsigned widthBuffer) 
     21{ 
     22    int x = blockDim.x * blockIdx.x + threadIdx.x; 
     23    int y = blockDim.y * blockIdx.y + threadIdx.y; 
     24    int indexGrid   = (y + 1) * widthGrid + x + 1; 
     25    int indexBuffer = y * widthBuffer + x; 
     26 
     27    grid[indexGrid] = buffer[indexBuffer]; 
     28} 
     29 
     30__global__ void storeGridToTransferBuffer(CanvasCell *grid, CanvasCell *buffer, unsigned widthGrid, unsigned widthBuffer) 
     31{ 
     32    int x = blockDim.x * blockIdx.x + threadIdx.x; 
     33    int y = blockDim.y * blockIdx.y + threadIdx.y; 
     34    int indexGrid   = (y + 1) * widthGrid + x + 1; 
     35    int indexBuffer = y * widthBuffer + x; 
     36 
     37    buffer[indexBuffer] = grid[indexGrid]; 
     38} 
     39 
    740namespace LibGeoDecomp { 
    841 
     42// fixme: this is hardcoded to 2d ATM 
    943template<typename CELL_TYPE> 
    1044class GPUSimulator : public MonolithicSimulator<CELL_TYPE> 
     
    1347    typedef typename CELL_TYPE::Topology Topology; 
    1448    typedef Grid<CELL_TYPE, Topology> GridType; 
    15     static const int DIMENSIONS = Topology::DIMENSIONS; 
    16  
    17     GPUSimulator(Initializer<CELL_TYPE> *initializer) : 
     49    static const int DIM = Topology::DIMENSIONS; 
     50 
     51    GPUSimulator(Initializer<CELL_TYPE> *initializer, const int& device = 0) : 
    1852        MonolithicSimulator<CELL_TYPE>(initializer) 
    1953    { 
    20         gridHost.resize(this->initializer->gridBox().dimensions); 
     54        Coord<DIM> dim = this->initializer->gridDimensions(); 
     55        gridHost.resize(dim); 
    2156        this->initializer->grid(&gridHost); 
     57 
     58        cudaSetDevice(device); 
     59        int byteSize = dim.prod() * sizeof(CELL_TYPE); 
     60        cudaMalloc(&transferGrid, byteSize); 
     61        cudaMemcpy(transferGrid, gridHost.baseAddress(), byteSize, cudaMemcpyHostToDevice); 
     62 
     63        // pad actual grids to avoid edge cell handling 
     64        Coord<2> paddedDim = dim + Coord<2>(2, 2); 
     65        byteSize = paddedDim.prod() * sizeof(CELL_TYPE); 
     66        cudaMalloc(&curGridDevice, byteSize); 
     67        cudaMalloc(&newGridDevice, byteSize); 
     68        GridType initGrid(paddedDim, gridHost.getEdgeCell(), gridHost.getEdgeCell()); 
     69        cudaMemcpy(curGridDevice, initGrid.baseAddress(), byteSize, cudaMemcpyHostToDevice); 
     70        cudaMemcpy(newGridDevice, initGrid.baseAddress(), byteSize, cudaMemcpyHostToDevice); 
     71 
     72        dim3 blockDim; 
     73        dim3 gridDim; 
     74        genKernelDimensions(&blockDim, &gridDim); 
     75        loadGridFromTransferBuffer<<<gridDim, blockDim>>>(curGridDevice, transferGrid, gridWidth() + 2, gridWidth()); 
     76        checkCudaError(); 
     77    } 
     78 
     79    virtual ~GPUSimulator() 
     80    { 
     81        cudaFree(&curGridDevice); 
     82        cudaFree(&newGridDevice); 
    2283    } 
    2384 
     
    40101    virtual const GridType *getGrid() 
    41102    { 
     103        dim3 blockDim; 
     104        dim3 gridDim; 
     105        genKernelDimensions(&blockDim, &gridDim); 
     106        storeGridToTransferBuffer<<<gridDim, blockDim>>>(curGridDevice, transferGrid, gridWidth() + 2, gridWidth()); 
     107        int byteSize = gridHost.getDimensions().prod() * sizeof(CELL_TYPE); 
     108        cudaMemcpy(gridHost.baseAddress(), transferGrid, byteSize, cudaMemcpyDeviceToHost); 
     109        checkCudaError(); 
    42110        return &gridHost; 
    43111    } 
     
    45113private: 
    46114    GridType gridHost; 
     115    CELL_TYPE *transferGrid; 
    47116    CELL_TYPE *curGridDevice; 
    48117    CELL_TYPE *newGridDevice; 
    49118 
     119    void checkCudaError() 
     120    { 
     121        cudaError_t error = cudaGetLastError(); 
     122        if (error != cudaSuccess) { 
     123            const char *errorMessage = cudaGetErrorString(error); 
     124            std::cerr << "CUDA: " << errorMessage << "\n"; 
     125            throw std::runtime_error("CUDA call failed"); 
     126        } 
     127    } 
     128 
    50129    void nanoStep(const unsigned& nanoStep) 
    51130    { 
    52         std::cout << "fixme InteractiveSimulatorGPU::nanoStep()\n"; 
     131        dim3 blockDim; 
     132        dim3 gridDim; 
     133        genKernelDimensions(&blockDim, &gridDim); 
     134        updateKernel<<<gridDim, blockDim>>>(curGridDevice, newGridDevice, gridWidth() + 2, nanoStep); 
     135        checkCudaError(); 
     136        std::swap(curGridDevice, newGridDevice); 
     137    } 
     138 
     139    void genKernelDimensions(dim3 *blockDim, dim3 *gridDim) 
     140    { 
     141        Coord<DIM> dim = gridHost.getDimensions(); 
     142        int blockDimX = 32; 
     143        int blockDimY = 8; 
     144        *blockDim = dim3(blockDimX, blockDimY); 
     145        *gridDim = dim3(dim.x() / blockDimX, dim.y() / blockDimY); 
     146    } 
     147 
     148    int gridWidth() 
     149    { 
     150        return gridHost.getDimensions().x(); 
    53151    } 
    54152}; 
     
    60158    typedef typename CELL_TYPE::Topology Topology; 
    61159    typedef Grid<CELL_TYPE, Topology> GridType; 
    62     static const int DIMENSIONS = Topology::DIMENSIONS; 
     160    typedef std::vector<boost::shared_ptr<Writer<CELL_TYPE> > > WriterVector; 
    63161 
    64162    InteractiveSimulatorGPU(QObject *parent, Initializer<CELL_TYPE> *initializer) : 
     
    77175    virtual void renderOutput() 
    78176    { 
    79         std::cout << "fixme InteractiveSimulatorGPU::renderOutput()\n"; 
     177        // fixme: this is the same for InteractiveSimulatorCPU. refactor? 
     178        for(unsigned i = 0; i < this->writers.size(); i++)  
     179            this->writers[i]->stepFinished(); 
    80180    } 
    81181 
    82182    virtual void update() 
    83183    { 
     184        // for (int fixme = 0; fixme < 500; ++fixme)  
    84185        GPUSimulator<CELL_TYPE>::step(); 
    85     } 
     186        // sleep(1); 
     187    } 
     188 
     189    virtual void registerWriter(Writer<CELL_TYPE> *writer) 
     190    { 
     191        writers.push_back(boost::shared_ptr<Writer<CELL_TYPE> >(writer)); 
     192    } 
     193 
     194protected: 
     195    WriterVector writers; 
    86196}; 
    87197 
  • src/examples/flowingcanvas/main.cu

    r159 r162  
    2727    flow.resize(1200, 900); 
    2828 
    29     InteractiveSimulatorGPU<CanvasCell> *sim = new InteractiveSimulatorGPU<CanvasCell>( 
     29    // InteractiveSimulatorGPU<CanvasCell> *sim = new InteractiveSimulatorGPU<CanvasCell>( 
     30    InteractiveSimulatorCPU<CanvasCell> *sim = new InteractiveSimulatorCPU<CanvasCell>( 
    3031        &flow, 
    3132        new CanvasInitializer());