Logo Search packages:      
Sourcecode: xserver-xorg-video-ati version File versions  Download package

r6xx_accel.c

/*
 * Copyright 2008 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * Authors: Alex Deucher <alexander.deucher@amd.com>
 *          Matthias Hopf <mhopf@suse.de>
 */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "xf86.h"

#include <errno.h>

#include "radeon.h"
#include "r600_shader.h"
#include "radeon_reg.h"
#include "r600_reg.h"
#include "r600_state.h"

#include "radeon_drm.h"
#include "radeon_vbo.h"

/* we try and batch operations together under KMS -
   but it doesn't work yet without misrendering */
#define KMS_MULTI_OP 1

/* Flush the indirect buffer to the kernel for submission to the card */
void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib)
{
    RADEONInfoPtr  info = RADEONPTR(pScrn);
    drmBufPtr          buffer = ib;
    int                start  = 0;
    drm_radeon_indirect_t  indirect;

#if defined(XF86DRM_MODE)
    if (info->cs) {
      radeon_cs_flush_indirect(pScrn);
      return;
    }
#endif

    if (!buffer) return;

    //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
    //       buffer->idx);

    while (buffer->used & 0x3c){
      BEGIN_BATCH(1);
        E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */
      END_BATCH();
    }

    //ErrorF("buffer bytes: %d\n", buffer->used);

    indirect.idx     = buffer->idx;
    indirect.start   = start;
    indirect.end     = buffer->used;
    indirect.discard = 1;

    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
                  &indirect, sizeof(drm_radeon_indirect_t));

}

void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
{
#if defined(XF86DRM_MODE)
    int ret;
    RADEONInfoPtr info = RADEONPTR(pScrn);
    if (info->cs) {
      if (info->accel_state->ib_reset_op) {
          /* if we have data just reset the CS and ignore the operation */
          info->cs->cdw = info->accel_state->ib_reset_op;
          info->accel_state->ib_reset_op = 0;
          return;
      }
      if (info->accel_state->vb_ptr) {
          info->accel_state->vb_ptr = NULL;
      }

      info->accel_state->vb_offset = 0;
      info->accel_state->vb_start_op = -1;

      if (CS_FULL(info->cs)) {
          radeon_cs_flush_indirect(pScrn);
          return;
      }
      radeon_cs_erase(info->cs);
      ret = radeon_cs_space_check(info->cs);
      if (ret)
          ErrorF("space check failed in flush\n");
    }
#endif
    if (!ib) return;

    ib->used = 0;
    R600CPFlushIndirect(pScrn, ib);
}

void
wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    //flush caches, don't generate timestamp
    BEGIN_BATCH(5);
    PACK3(ib, IT_EVENT_WRITE, 1);
    E32(ib, CACHE_FLUSH_AND_INV_EVENT);
    // wait for 3D idle clean
    EREG(ib, WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
                                       WAIT_3D_IDLECLEAN_bit));
    END_BATCH();
}

void
wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    BEGIN_BATCH(3);
    EREG(ib, WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
    END_BATCH();
}

void
start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    if (info->ChipFamily < CHIP_FAMILY_RV770) {
      BEGIN_BATCH(5);
      PACK3(ib, IT_START_3D_CMDBUF, 1);
      E32(ib, 0);
    } else
      BEGIN_BATCH(3);

    PACK3(ib, IT_CONTEXT_CONTROL, 2);
    E32(ib, 0x80000000);
    E32(ib, 0x80000000);
    END_BATCH();

    wait_3d_idle_clean (pScrn, ib);
}

/*
 * Setup of functional groups
 */

// asic stack/thread/gpr limits - need to query the drm
static void
sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
{
    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
    RADEONInfoPtr info = RADEONPTR(pScrn);

    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
      (info->ChipFamily == CHIP_FAMILY_RV620) ||
      (info->ChipFamily == CHIP_FAMILY_RS780) ||
      (info->ChipFamily == CHIP_FAMILY_RS880) ||
      (info->ChipFamily == CHIP_FAMILY_RV710))
      sq_config = 0;                                  // no VC
    else
      sq_config = VC_ENABLE_bit;

    sq_config |= (DX9_CONSTS_bit |
              ALU_INST_PREFER_VECTOR_bit |
              (sq_conf->ps_prio << PS_PRIO_shift) |
              (sq_conf->vs_prio << VS_PRIO_shift) |
              (sq_conf->gs_prio << GS_PRIO_shift) |
              (sq_conf->es_prio << ES_PRIO_shift));

    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
                        (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
                        (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
                        (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));

    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
                         (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
                         (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
                         (sq_conf->num_es_threads << NUM_ES_THREADS_shift));

    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
                        (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));

    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
                        (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));

    BEGIN_BATCH(8);
    PACK0(ib, SQ_CONFIG, 6);
    E32(ib, sq_config);
    E32(ib, sq_gpr_resource_mgmt_1);
    E32(ib, sq_gpr_resource_mgmt_2);
    E32(ib, sq_thread_resource_mgmt);
    E32(ib, sq_stack_resource_mgmt_1);
    E32(ib, sq_stack_resource_mgmt_2);
    END_BATCH();
}

void
set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf)
{
    uint32_t cb_color_info;
    int pitch, slice, h;
    RADEONInfoPtr info = RADEONPTR(pScrn);

    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)                   |
                 (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)       |
                 (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)         |
                 (cb_conf->number_type << NUMBER_TYPE_shift)                  |
                 (cb_conf->comp_swap   << COMP_SWAP_shift)                    |
                 (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
    if (cb_conf->read_size)
      cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
    if (cb_conf->blend_clamp)
      cb_color_info |= BLEND_CLAMP_bit;
    if (cb_conf->clear_color)
      cb_color_info |= CLEAR_COLOR_bit;
    if (cb_conf->blend_bypass)
      cb_color_info |= BLEND_BYPASS_bit;
    if (cb_conf->blend_float32)
      cb_color_info |= BLEND_FLOAT32_bit;
    if (cb_conf->simple_float)
      cb_color_info |= SIMPLE_FLOAT_bit;
    if (cb_conf->round_mode)
      cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
    if (cb_conf->tile_compact)
      cb_color_info |= TILE_COMPACT_bit;
    if (cb_conf->source_format)
      cb_color_info |= SOURCE_FORMAT_bit;

    pitch = (cb_conf->w / 8) - 1;
    h = (cb_conf->h + 7) & ~7;
    slice = ((cb_conf->w * h) / 64) - 1;

    BEGIN_BATCH(3 + 2);
    EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
    RELOC_BATCH(cb_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0);
    END_BATCH();

    // rv6xx workaround
    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
        (info->ChipFamily < CHIP_FAMILY_RV770)) {
        BEGIN_BATCH(2);
        PACK3(ib, IT_SURFACE_BASE_UPDATE, 1);
        E32(ib, (2 << cb_conf->id));
        END_BATCH();
    }
    /* Set CMASK & TILE buffer to the offset of color buffer as
     * we don't use those this shouldn't cause any issue and we
     * then have a valid cmd stream
     */
    BEGIN_BATCH(3 + 2);
    EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8)); // CMASK per-tile data base/256
    RELOC_BATCH(cb_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0);
    END_BATCH();
    BEGIN_BATCH(3 + 2);
    EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8)); // FMASK per-tile data base/256
    RELOC_BATCH(cb_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0);
    END_BATCH();
    BEGIN_BATCH(12);
    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
    EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) |
                                        (slice << SLICE_TILE_MAX_shift)));
    EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)           |
                                        (0    << SLICE_MAX_shift)));
    EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
    EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift) |
                                        (0    << FMASK_TILE_MAX_shift)));
    END_BATCH();
}

void
cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr,
                struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    uint32_t cp_coher_size;
    if (size == 0xffffffff)
      cp_coher_size = 0xffffffff;
    else
      cp_coher_size = ((size + 255) >> 8);

    BEGIN_BATCH(5 + 2);
    PACK3(ib, IT_SURFACE_SYNC, 4);
    E32(ib, sync_type);
    E32(ib, cp_coher_size);
    E32(ib, (mc_addr >> 8));
    E32(ib, 10); /* poll interval */
    RELOC_BATCH(bo, rdomains, wdomain);
    END_BATCH();
}

/* inserts a wait for vline in the command stream */
void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
                  xf86CrtcPtr crtc, int start, int stop)
{
    RADEONInfoPtr  info = RADEONPTR(pScrn);
    uint32_t offset;

    if (!crtc)
        return;

    if (stop < start)
        return;

    if (!crtc->enabled)
        return;

    if (info->cs) {
        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
          return;
    } else {
#ifdef USE_EXA
      if (info->useEXA)
          offset = exaGetPixmapOffset(pPix);
      else
#endif
          offset = pPix->devPrivate.ptr - info->FB;

      /* if drawing to front buffer */
      if (offset != 0)
          return;
    }

    start = max(start, 0);
    stop = min(stop, crtc->mode.VDisplay);

    if (start > crtc->mode.VDisplay)
        return;

#if defined(XF86DRM_MODE)
    if (info->cs) {
      drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;

      BEGIN_BATCH(11);
      /* set the VLINE range */
      EREG(ib, AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */
           (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
           (stop << AVIVO_D1MODE_VLINE_END_SHIFT));

      /* tell the CP to poll the VLINE state register */
      PACK3(ib, IT_WAIT_REG_MEM, 6);
      E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
      E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS));
      E32(ib, 0);
      E32(ib, 0);                          // Ref value
      E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
      E32(ib, 10);                         // Wait interval
      /* add crtc reloc */
      PACK3(ib, IT_NOP, 1);
      E32(ib, drmmode_crtc->mode_crtc->crtc_id);
      END_BATCH();
    } else
#endif
    {
      RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;

      BEGIN_BATCH(9);
      /* set the VLINE range */
      EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset,
           (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
           (stop << AVIVO_D1MODE_VLINE_END_SHIFT));

      /* tell the CP to poll the VLINE state register */
      PACK3(ib, IT_WAIT_REG_MEM, 6);
      E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
      E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset));
      E32(ib, 0);
      E32(ib, 0);                          // Ref value
      E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
      E32(ib, 10);                         // Wait interval
      END_BATCH();
    }
}

void
fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    uint32_t sq_pgm_resources;

    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
                  (fs_conf->stack_size << STACK_SIZE_shift));

    if (fs_conf->dx10_clamp)
      sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;

    BEGIN_BATCH(3 + 2);
    EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
    RELOC_BATCH(fs_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0);
    END_BATCH();

    BEGIN_BATCH(6);
    EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources);
    EREG(ib, SQ_PGM_CF_OFFSET_FS, 0);
    END_BATCH();
}

void
vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    uint32_t sq_pgm_resources;

    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
                  (vs_conf->stack_size << STACK_SIZE_shift));

    if (vs_conf->dx10_clamp)
      sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
    if (vs_conf->fetch_cache_lines)
      sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
    if (vs_conf->uncached_first_inst)
      sq_pgm_resources |= UNCACHED_FIRST_INST_bit;

    BEGIN_BATCH(3 + 2);
    EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
    RELOC_BATCH(vs_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0);
    END_BATCH();

    BEGIN_BATCH(6);
    EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources);
    EREG(ib, SQ_PGM_CF_OFFSET_VS, 0);
    END_BATCH();
}

void
ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    uint32_t sq_pgm_resources;

    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
                  (ps_conf->stack_size << STACK_SIZE_shift));

    if (ps_conf->dx10_clamp)
      sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
    if (ps_conf->fetch_cache_lines)
      sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
    if (ps_conf->uncached_first_inst)
      sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
    if (ps_conf->clamp_consts)
      sq_pgm_resources |= CLAMP_CONSTS_bit;

    BEGIN_BATCH(3 + 2);
    EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
    RELOC_BATCH(ps_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0);
    END_BATCH();

    BEGIN_BATCH(9);
    EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources);
    EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
    EREG(ib, SQ_PGM_CF_OFFSET_PS, 0);
    END_BATCH();
}

void
set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    int i;
    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);

    BEGIN_BATCH(2 + countreg);
    PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
    for (i = 0; i < countreg; i++)
      EFLOAT(ib, const_buf[i]);
    END_BATCH();
}

void
set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    /* bool register order is: ps, vs, gs; one register each
     * 1 bits per bool; 32 bools each for ps, vs, gs.
     */
    BEGIN_BATCH(3);
    EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
    END_BATCH();
}

void
set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    uint32_t sq_vtx_constant_word2;

    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
                       ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
                       (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
                       (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
                       (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
    if (res->clamp_x)
          sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;

    if (res->format_comp_all)
          sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;

    if (res->srf_mode_all)
          sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;

    BEGIN_BATCH(9 + 2);
    PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
    E32(ib, res->vb_addr & 0xffffffff);                     // 0: BASE_ADDRESS
    E32(ib, (res->vtx_num_entries << 2) - 1);               // 1: SIZE
    E32(ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
    E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift);         // 3: MEM_REQUEST_SIZE ?!?
    E32(ib, 0);                                       // 4: n/a
    E32(ib, 0);                                       // 5: n/a
    E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);  // 6: TYPE
    RELOC_BATCH(res->bo, RADEON_GEM_DOMAIN_GTT, 0);
    END_BATCH();
}

void
set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;

    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
                       (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));

    if (tex_res->w)
      sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
                          ((tex_res->w - 1) << TEX_WIDTH_shift));

    if (tex_res->tile_type)
      sq_tex_resource_word0 |= TILE_TYPE_bit;

    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);

    if (tex_res->h)
      sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
    if (tex_res->depth)
      sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);

    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
                       (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
                       (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
                       (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
                       (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
                       (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
                       (tex_res->request_size << REQUEST_SIZE_shift) |
                       (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
                       (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
                       (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
                       (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
                       (tex_res->base_level << BASE_LEVEL_shift));

    if (tex_res->srf_mode_all)
      sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
    if (tex_res->force_degamma)
      sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;

    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
                       (tex_res->base_array << BASE_ARRAY_shift) |
                       (tex_res->last_array << LAST_ARRAY_shift));

    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
                       (tex_res->perf_modulation << PERF_MODULATION_shift) |
                       (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));

    if (tex_res->interlaced)
      sq_tex_resource_word6 |= INTERLACED_bit;

    BEGIN_BATCH(9 + 4);
    PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
    E32(ib, sq_tex_resource_word0);
    E32(ib, sq_tex_resource_word1);
    E32(ib, ((tex_res->base) >> 8));
    E32(ib, ((tex_res->mip_base) >> 8));
    E32(ib, sq_tex_resource_word4);
    E32(ib, sq_tex_resource_word5);
    E32(ib, sq_tex_resource_word6);
    RELOC_BATCH(tex_res->bo, RADEON_GEM_DOMAIN_VRAM, 0);
    RELOC_BATCH(tex_res->mip_bo, RADEON_GEM_DOMAIN_VRAM, 0);
    END_BATCH();
}

void
set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;

    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)         |
                      (s->clamp_y       << CLAMP_Y_shift)                           |
                      (s->clamp_z       << CLAMP_Z_shift)                           |
                      (s->xy_mag_filter << XY_MAG_FILTER_shift)                     |
                      (s->xy_min_filter << XY_MIN_FILTER_shift)                     |
                      (s->z_filter      << Z_FILTER_shift)  |
                      (s->mip_filter    << MIP_FILTER_shift)                        |
                      (s->border_color  << BORDER_COLOR_TYPE_shift)                 |
                      (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)                  |
                      (s->chroma_key    << CHROMA_KEY_shift));
    if (s->point_sampling_clamp)
      sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
    if (s->tex_array_override)
      sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
    if (s->lod_uses_minor_axis)
      sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;

    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)                           |
                      (s->max_lod       << MAX_LOD_shift)                           |
                      (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));

    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)    |
                      (s->perf_mip      << PERF_MIP_shift)  |
                      (s->perf_z        << PERF_Z_shift));
    if (s->mc_coord_truncate)
      sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
    if (s->force_degamma)
      sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
    if (s->high_precision_filter)
      sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
    if (s->fetch_4)
      sq_tex_sampler_word2 |= FETCH_4_bit;
    if (s->sample_is_pcf)
      sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
    if (s->type)
      sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;

    BEGIN_BATCH(5);
    PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
    E32(ib, sq_tex_sampler_word0);
    E32(ib, sq_tex_sampler_word1);
    E32(ib, sq_tex_sampler_word2);
    END_BATCH();
}

//XXX deal with clip offsets in clip setup
void
set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    BEGIN_BATCH(6);
    EREG(ib, PA_SC_SCREEN_SCISSOR_TL,              ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
                                        (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
    EREG(ib, PA_SC_SCREEN_SCISSOR_BR,              ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
                                        (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
    END_BATCH();
}

void
set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    BEGIN_BATCH(6);
    EREG(ib, PA_SC_VPORT_SCISSOR_0_TL +
       id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
                                    (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
                                    WINDOW_OFFSET_DISABLE_bit));
    EREG(ib, PA_SC_VPORT_SCISSOR_0_BR +
       id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
                                    (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
    END_BATCH();
}

void
set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    BEGIN_BATCH(6);
    EREG(ib, PA_SC_GENERIC_SCISSOR_TL,            ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
                                       (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
                                       WINDOW_OFFSET_DISABLE_bit));
    EREG(ib, PA_SC_GENERIC_SCISSOR_BR,            ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
                                       (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
    END_BATCH();
}

void
set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    BEGIN_BATCH(6);
    EREG(ib, PA_SC_WINDOW_SCISSOR_TL,             ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
                                       (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
                                       WINDOW_OFFSET_DISABLE_bit));
    EREG(ib, PA_SC_WINDOW_SCISSOR_BR,             ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
                                       (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
    END_BATCH();
}

void
set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    BEGIN_BATCH(6);
    EREG(ib, PA_SC_CLIPRECT_0_TL +
       id * PA_SC_CLIPRECT_0_TL_offset,     ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
                                     (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
    EREG(ib, PA_SC_CLIPRECT_0_BR +
       id * PA_SC_CLIPRECT_0_BR_offset,     ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
                                     (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
    END_BATCH();
}

/*
 * Setup of default state
 */

void
set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
{
    tex_resource_t tex_res;
    shader_config_t fs_conf;
    sq_config_t sq_conf;
    int i;
    RADEONInfoPtr info = RADEONPTR(pScrn);
    struct radeon_accel_state *accel_state = info->accel_state;

    if (accel_state->XInited3D)
      return;

    memset(&tex_res, 0, sizeof(tex_resource_t));
    memset(&fs_conf, 0, sizeof(shader_config_t));

    accel_state->XInited3D = TRUE;

    start_3d(pScrn, accel_state->ib);

    // ASIC specific setup, see drm
    BEGIN_BATCH(15);
    if (info->ChipFamily < CHIP_FAMILY_RV770) {
      EREG(ib, TA_CNTL_AUX,                     (( 3 << GRADIENT_CREDIT_shift)            |
                                       (28 << TD_FIFO_CREDIT_shift)));
      EREG(ib, VC_ENHANCE,                      0);
      EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
      EREG(ib, DB_DEBUG,                        0x82000000); /* ? */
      EREG(ib, DB_WATERMARKS,               ((4  << DEPTH_FREE_shift)         |
                                     (16 << DEPTH_FLUSH_shift)          |
                                     (0  << FORCE_SUMMARIZE_shift)            |
                                     (4  << DEPTH_PENDING_FREE_shift)   |
                                     (16 << DEPTH_CACHELINE_FREE_shift) |
                                     0));
    } else {
      EREG(ib, TA_CNTL_AUX,                      (( 2 << GRADIENT_CREDIT_shift)           |
                                        (28 << TD_FIFO_CREDIT_shift)));
      EREG(ib, VC_ENHANCE,                       0);
      EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
      EREG(ib, DB_DEBUG,                         0);
      EREG(ib, DB_WATERMARKS,                    ((4  << DEPTH_FREE_shift)          |
                                        (16 << DEPTH_FLUSH_shift)       |
                                        (0  << FORCE_SUMMARIZE_shift)         |
                                        (4  << DEPTH_PENDING_FREE_shift)      |
                                        (4  << DEPTH_CACHELINE_FREE_shift)    |
                                        0));
    }
    END_BATCH();

    // SQ
    sq_conf.ps_prio = 0;
    sq_conf.vs_prio = 1;
    sq_conf.gs_prio = 2;
    sq_conf.es_prio = 3;
    // need to set stack/thread/gpr limits based on the asic
    // for now just set them low enough so any card will work
    // see r600_cp.c in the drm
    switch (info->ChipFamily) {
    case CHIP_FAMILY_R600:
      sq_conf.num_ps_gprs = 192;
      sq_conf.num_vs_gprs = 56;
      sq_conf.num_temp_gprs = 4;
      sq_conf.num_gs_gprs = 0;
      sq_conf.num_es_gprs = 0;
      sq_conf.num_ps_threads = 136;
      sq_conf.num_vs_threads = 48;
      sq_conf.num_gs_threads = 4;
      sq_conf.num_es_threads = 4;
      sq_conf.num_ps_stack_entries = 128;
      sq_conf.num_vs_stack_entries = 128;
      sq_conf.num_gs_stack_entries = 0;
      sq_conf.num_es_stack_entries = 0;
      break;
    case CHIP_FAMILY_RV630:
    case CHIP_FAMILY_RV635:
      sq_conf.num_ps_gprs = 84;
      sq_conf.num_vs_gprs = 36;
      sq_conf.num_temp_gprs = 4;
      sq_conf.num_gs_gprs = 0;
      sq_conf.num_es_gprs = 0;
      sq_conf.num_ps_threads = 144;
      sq_conf.num_vs_threads = 40;
      sq_conf.num_gs_threads = 4;
      sq_conf.num_es_threads = 4;
      sq_conf.num_ps_stack_entries = 40;
      sq_conf.num_vs_stack_entries = 40;
      sq_conf.num_gs_stack_entries = 32;
      sq_conf.num_es_stack_entries = 16;
      break;
    case CHIP_FAMILY_RV610:
    case CHIP_FAMILY_RV620:
    case CHIP_FAMILY_RS780:
    case CHIP_FAMILY_RS880:
    default:
      sq_conf.num_ps_gprs = 84;
      sq_conf.num_vs_gprs = 36;
      sq_conf.num_temp_gprs = 4;
      sq_conf.num_gs_gprs = 0;
      sq_conf.num_es_gprs = 0;
      sq_conf.num_ps_threads = 136;
      sq_conf.num_vs_threads = 48;
      sq_conf.num_gs_threads = 4;
      sq_conf.num_es_threads = 4;
      sq_conf.num_ps_stack_entries = 40;
      sq_conf.num_vs_stack_entries = 40;
      sq_conf.num_gs_stack_entries = 32;
      sq_conf.num_es_stack_entries = 16;
      break;
    case CHIP_FAMILY_RV670:
      sq_conf.num_ps_gprs = 144;
      sq_conf.num_vs_gprs = 40;
      sq_conf.num_temp_gprs = 4;
      sq_conf.num_gs_gprs = 0;
      sq_conf.num_es_gprs = 0;
      sq_conf.num_ps_threads = 136;
      sq_conf.num_vs_threads = 48;
      sq_conf.num_gs_threads = 4;
      sq_conf.num_es_threads = 4;
      sq_conf.num_ps_stack_entries = 40;
      sq_conf.num_vs_stack_entries = 40;
      sq_conf.num_gs_stack_entries = 32;
      sq_conf.num_es_stack_entries = 16;
      break;
    case CHIP_FAMILY_RV770:
      sq_conf.num_ps_gprs = 192;
      sq_conf.num_vs_gprs = 56;
      sq_conf.num_temp_gprs = 4;
      sq_conf.num_gs_gprs = 0;
      sq_conf.num_es_gprs = 0;
      sq_conf.num_ps_threads = 188;
      sq_conf.num_vs_threads = 60;
      sq_conf.num_gs_threads = 0;
      sq_conf.num_es_threads = 0;
      sq_conf.num_ps_stack_entries = 256;
      sq_conf.num_vs_stack_entries = 256;
      sq_conf.num_gs_stack_entries = 0;
      sq_conf.num_es_stack_entries = 0;
      break;
    case CHIP_FAMILY_RV730:
    case CHIP_FAMILY_RV740:
      sq_conf.num_ps_gprs = 84;
      sq_conf.num_vs_gprs = 36;
      sq_conf.num_temp_gprs = 4;
      sq_conf.num_gs_gprs = 0;
      sq_conf.num_es_gprs = 0;
      sq_conf.num_ps_threads = 188;
      sq_conf.num_vs_threads = 60;
      sq_conf.num_gs_threads = 0;
      sq_conf.num_es_threads = 0;
      sq_conf.num_ps_stack_entries = 128;
      sq_conf.num_vs_stack_entries = 128;
      sq_conf.num_gs_stack_entries = 0;
      sq_conf.num_es_stack_entries = 0;
      break;
    case CHIP_FAMILY_RV710:
      sq_conf.num_ps_gprs = 192;
      sq_conf.num_vs_gprs = 56;
      sq_conf.num_temp_gprs = 4;
      sq_conf.num_gs_gprs = 0;
      sq_conf.num_es_gprs = 0;
      sq_conf.num_ps_threads = 144;
      sq_conf.num_vs_threads = 48;
      sq_conf.num_gs_threads = 0;
      sq_conf.num_es_threads = 0;
      sq_conf.num_ps_stack_entries = 128;
      sq_conf.num_vs_stack_entries = 128;
      sq_conf.num_gs_stack_entries = 0;
      sq_conf.num_es_stack_entries = 0;
      break;
    }

    sq_setup(pScrn, ib, &sq_conf);

    BEGIN_BATCH(59);
    EREG(ib, SQ_VTX_BASE_VTX_LOC,                 0);
    EREG(ib, SQ_VTX_START_INST_LOC,               0);

    PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9);
    E32(ib, 0);                                       // SQ_ESGS_RING_ITEMSIZE
    E32(ib, 0);                                       // SQ_GSVS_RING_ITEMSIZE
    E32(ib, 0);                                       // SQ_ESTMP_RING_ITEMSIZE
    E32(ib, 0);                                       // SQ_GSTMP_RING_ITEMSIZE
    E32(ib, 0);                                       // SQ_VSTMP_RING_ITEMSIZE
    E32(ib, 0);                                       // SQ_PSTMP_RING_ITEMSIZE
    E32(ib, 0);                                       // SQ_FBUF_RING_ITEMSIZE
    E32(ib, 0);                                       // SQ_REDUC_RING_ITEMSIZE
    E32(ib, 0);                                       // SQ_GS_VERT_ITEMSIZE

    // DB
    EREG(ib, DB_DEPTH_INFO,                       0);
    EREG(ib, DB_STENCIL_CLEAR,                    0);
    EREG(ib, DB_DEPTH_CLEAR,                      0);
    EREG(ib, DB_STENCILREFMASK,                   0);
    EREG(ib, DB_STENCILREFMASK_BF,                0);
    EREG(ib, DB_DEPTH_CONTROL,                    0);
    EREG(ib, DB_RENDER_CONTROL,                   STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
    if (info->ChipFamily < CHIP_FAMILY_RV770)
      EREG(ib, DB_RENDER_OVERRIDE,              FORCE_SHADER_Z_ORDER_bit);
    else
      EREG(ib, DB_RENDER_OVERRIDE,              0);
    EREG(ib, DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)     |
                                       (2 << ALPHA_TO_MASK_OFFSET1_shift)     |
                                       (2 << ALPHA_TO_MASK_OFFSET2_shift)     |
                                       (2 << ALPHA_TO_MASK_OFFSET3_shift)));


    EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
                         DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */


    // SX
    EREG(ib, SX_ALPHA_TEST_CONTROL,               0);
    EREG(ib, SX_ALPHA_REF,                        0);

    // CB
    PACK0(ib, CB_BLEND_RED, 4);
    E32(ib, 0x00000000);
    E32(ib, 0x00000000);
    E32(ib, 0x00000000);
    E32(ib, 0x00000000);
    END_BATCH();

    if (info->ChipFamily < CHIP_FAMILY_RV770) {
      BEGIN_BATCH(11);
      PACK0(ib, CB_FOG_RED, 3);
      E32(ib, 0x00000000);
      E32(ib, 0x00000000);
      E32(ib, 0x00000000);
      PACK0(ib, CB_CLEAR_RED, 4);
      EFLOAT(ib, 1.0);                                /* WTF? */
      EFLOAT(ib, 0.0);
      EFLOAT(ib, 1.0);
      EFLOAT(ib, 1.0);
      END_BATCH();
    }

    BEGIN_BATCH(18);
    PACK0(ib, CB_CLRCMP_CONTROL, 4);
    E32(ib, 1 << CLRCMP_FCN_SEL_shift);                     // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
    E32(ib, 0);                                       // CB_CLRCMP_SRC
    E32(ib, 0);                                       // CB_CLRCMP_DST
    E32(ib, 0);                                       // CB_CLRCMP_MSK

    EREG(ib, CB_SHADER_MASK,                      (0xf << OUTPUT0_ENABLE_shift));
    EREG(ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));


    // SC
    EREG(ib, PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
                                       (0 << WINDOW_Y_OFFSET_shift)));

    EREG(ib, PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
    END_BATCH();

    /* clip boolean is set to always visible -> doesn't matter */
    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
      set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);

    BEGIN_BATCH(3);
    if (info->ChipFamily < CHIP_FAMILY_RV770)
      EREG(ib, R7xx_PA_SC_EDGERULE,             0x00000000);
    else
      EREG(ib, R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
    END_BATCH();

    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) {
      set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
      BEGIN_BATCH(4);
      PACK0(ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2);
      EFLOAT(ib, 0.0);
      EFLOAT(ib, 1.0);
      END_BATCH();
    }

    BEGIN_BATCH(15);
    if (info->ChipFamily < CHIP_FAMILY_RV770)
      EREG(ib, PA_SC_MODE_CNTL,                 (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
    else
      EREG(ib, PA_SC_MODE_CNTL,                 (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
                                       0x00500000)); /* ? */

    EREG(ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
                          (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
                          (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));


    EREG(ib, PA_SC_LINE_CNTL,                     0);
    EREG(ib, PA_SC_AA_CONFIG,                     0);
    EREG(ib, PA_SC_AA_MASK,                       0xFFFFFFFF);
    END_BATCH();

    //XXX: double check this
    if (info->ChipFamily > CHIP_FAMILY_R600) {
      BEGIN_BATCH(6);
      EREG(ib, PA_SC_AA_SAMPLE_LOCS_MCTX,       0);
      EREG(ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M,   0);
      END_BATCH();
    }

    BEGIN_BATCH(83);
    EREG(ib, PA_SC_LINE_STIPPLE,                  0);
    EREG(ib, PA_SC_MPASS_PS_CNTL,                 0);

    // CL
    PACK0(ib, PA_CL_VPORT_XSCALE_0, 6);
    EFLOAT(ib, 0.0f);                                 // PA_CL_VPORT_XSCALE
    EFLOAT(ib, 0.0f);                                 // PA_CL_VPORT_XOFFSET
    EFLOAT(ib, 0.0f);                                 // PA_CL_VPORT_YSCALE
    EFLOAT(ib, 0.0f);                                 // PA_CL_VPORT_YOFFSET
    EFLOAT(ib, 0.0f);                                 // PA_CL_VPORT_ZSCALE
    EFLOAT(ib, 0.0f);                                 // PA_CL_VPORT_ZOFFSET
    EREG(ib, PA_CL_VTE_CNTL,                      0);
    EREG(ib, PA_CL_VS_OUT_CNTL,                   0);
    EREG(ib, PA_CL_NANINF_CNTL,                   0);
    PACK0(ib, PA_CL_GB_VERT_CLIP_ADJ, 4);
    EFLOAT(ib, 1.0);                                  // PA_CL_GB_VERT_CLIP_ADJ
    EFLOAT(ib, 1.0);                                  // PA_CL_GB_VERT_DISC_ADJ
    EFLOAT(ib, 1.0);                                  // PA_CL_GB_HORZ_CLIP_ADJ
    EFLOAT(ib, 1.0);                                  // PA_CL_GB_HORZ_DISC_ADJ

    /* Scissor / viewport */
    EREG(ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
    EREG(ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);

    // SU
    EREG(ib, PA_SU_SC_MODE_CNTL,                  FACE_bit);
    EREG(ib, PA_SU_POINT_SIZE,                    0);
    EREG(ib, PA_SU_POINT_MINMAX,                  0);
    EREG(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL,       0);
    EREG(ib, PA_SU_POLY_OFFSET_BACK_SCALE,        0);
    EREG(ib, PA_SU_POLY_OFFSET_FRONT_SCALE,       0);
    EREG(ib, PA_SU_POLY_OFFSET_BACK_OFFSET,       0);
    EREG(ib, PA_SU_POLY_OFFSET_FRONT_OFFSET,      0);

    EREG(ib, PA_SU_LINE_CNTL,                     (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */
    EREG(ib, PA_SU_VTX_CNTL,                      ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit |
                                       (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
    EREG(ib, PA_SU_POLY_OFFSET_CLAMP,             0);

    // SPI
    if (info->ChipFamily < CHIP_FAMILY_RV770)
      EREG(ib, R7xx_SPI_THREAD_GROUPING,        0);
    else
      EREG(ib, R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));

    EREG(ib, SPI_INPUT_Z,                         0);
    EREG(ib, SPI_FOG_CNTL,                        0);
    EREG(ib, SPI_FOG_FUNC_SCALE,                  0);
    EREG(ib, SPI_FOG_FUNC_BIAS,                   0);
    END_BATCH();

    // clear FS
    fs_conf.bo = accel_state->shaders_bo;
    fs_setup(pScrn, ib, &fs_conf);

    // VGT
    BEGIN_BATCH(75);
    EREG(ib, VGT_MAX_VTX_INDX,                    2048); /* XXX set to a reasonably large number of indices */
    EREG(ib, VGT_MIN_VTX_INDX,                    0);
    EREG(ib, VGT_INDX_OFFSET,                     0);
    EREG(ib, VGT_INSTANCE_STEP_RATE_0,            0);
    EREG(ib, VGT_INSTANCE_STEP_RATE_1,            0);
    EREG(ib, VGT_MULTI_PRIM_IB_RESET_INDX,        0);
    EREG(ib, VGT_OUTPUT_PATH_CNTL,                0);
    EREG(ib, VGT_GS_MODE,                         0);
    EREG(ib, VGT_HOS_CNTL,                        0);
    EREG(ib, VGT_HOS_MAX_TESS_LEVEL,              0);
    EREG(ib, VGT_HOS_MIN_TESS_LEVEL,              0);
    EREG(ib, VGT_HOS_REUSE_DEPTH,                 0);
    EREG(ib, VGT_GROUP_PRIM_TYPE,                 0);
    EREG(ib, VGT_GROUP_FIRST_DECR,                0);
    EREG(ib, VGT_GROUP_DECR,                      0);
    EREG(ib, VGT_GROUP_VECT_0_CNTL,               0);
    EREG(ib, VGT_GROUP_VECT_1_CNTL,               0);
    EREG(ib, VGT_GROUP_VECT_0_FMT_CNTL,           0);
    EREG(ib, VGT_GROUP_VECT_1_FMT_CNTL,           0);
    EREG(ib, VGT_PRIMITIVEID_EN,                  0);
    EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN,          0);
    EREG(ib, VGT_STRMOUT_EN,                      0);
    EREG(ib, VGT_REUSE_OFF,                       0);
    EREG(ib, VGT_VTX_CNT_EN,                      0);
    EREG(ib, VGT_STRMOUT_BUFFER_EN,               0);
    END_BATCH();
}


/*
 * Commands
 */

void
draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    uint32_t i, count;

    // calculate num of packets
    count = 2;
    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
      count += (draw_conf->num_indices + 1) / 2;
    else
      count += draw_conf->num_indices;

    BEGIN_BATCH(8 + count);
    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
    PACK3(ib, IT_INDEX_TYPE, 1);
    E32(ib, draw_conf->index_type);
    PACK3(ib, IT_NUM_INSTANCES, 1);
    E32(ib, draw_conf->num_instances);

    PACK3(ib, IT_DRAW_INDEX_IMMD, count);
    E32(ib, draw_conf->num_indices);
    E32(ib, draw_conf->vgt_draw_initiator);

    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
      for (i = 0; i < draw_conf->num_indices; i += 2) {
          if ((i + 1) == draw_conf->num_indices)
            E32(ib, indices[i]);
          else
            E32(ib, (indices[i] | (indices[i + 1] << 16)));
      }
    } else {
      for (i = 0; i < draw_conf->num_indices; i++)
          E32(ib, indices[i]);
    }
    END_BATCH();
}

void
draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    BEGIN_BATCH(10);
    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
    PACK3(ib, IT_INDEX_TYPE, 1);
    E32(ib, draw_conf->index_type);
    PACK3(ib, IT_NUM_INSTANCES, 1);
    E32(ib, draw_conf->num_instances);
    PACK3(ib, IT_DRAW_INDEX_AUTO, 2);
    E32(ib, draw_conf->num_indices);
    E32(ib, draw_conf->vgt_draw_initiator);
    END_BATCH();
}

Bool
r600_vb_get(ScrnInfoPtr pScrn)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    struct radeon_accel_state *accel_state = info->accel_state;

    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
      (accel_state->ib->idx*accel_state->ib->total)+
      (accel_state->ib->total / 2);
    accel_state->vb_total = (accel_state->ib->total / 2);
    accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address +
                            (accel_state->ib->total / 2));
    accel_state->vb_offset = 0;
    return TRUE;
}

void
r600_vb_discard(ScrnInfoPtr pScrn)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);

    info->accel_state->vb_start_op = -1;
}



int
r600_cp_start(ScrnInfoPtr pScrn)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    struct radeon_accel_state *accel_state = info->accel_state;

#if defined(XF86DRM_MODE)
    if (info->cs) {
      if (CS_FULL(info->cs)) {
          radeon_cs_flush_indirect(pScrn);
      }
      accel_state->ib_reset_op = info->cs->cdw;
      accel_state->vb_start_op = accel_state->vb_offset;
    } else
#endif
    {
      accel_state->ib = RADEONCPGetBuffer(pScrn);
      if (!r600_vb_get(pScrn)) {
          return -1;
      }
      accel_state->vb_start_op = accel_state->vb_offset;
    }
    return 0;
}

void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
{
    RADEONInfoPtr info = RADEONPTR(pScrn);
    struct radeon_accel_state *accel_state = info->accel_state;
    draw_config_t   draw_conf;
    vtx_resource_t  vtx_res;

    if (accel_state->vb_start_op == -1)
      return;

    CLEAR (draw_conf);
    CLEAR (vtx_res);

    if (accel_state->vb_offset == accel_state->vb_start_op) {
        R600IBDiscard(pScrn, accel_state->ib);
      r600_vb_discard(pScrn);
      return;
    }

    /* flush vertex cache */
    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
      (info->ChipFamily == CHIP_FAMILY_RV620) ||
      (info->ChipFamily == CHIP_FAMILY_RS780) ||
      (info->ChipFamily == CHIP_FAMILY_RS880) ||
      (info->ChipFamily == CHIP_FAMILY_RV710))
      cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
                      accel_state->vb_offset, accel_state->vb_mc_addr,
                      accel_state->vb_bo,
                      RADEON_GEM_DOMAIN_GTT, 0);
    else
      cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
                      accel_state->vb_offset, accel_state->vb_mc_addr,
                      accel_state->vb_bo,
                      RADEON_GEM_DOMAIN_GTT, 0);

    /* Vertex buffer setup */
    accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op;
    vtx_res.id              = SQ_VTX_RESOURCE_vs;
    vtx_res.vtx_size_dw     = vtx_size / 4;
    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
    vtx_res.mem_req_size    = 1;
    vtx_res.vb_addr         = accel_state->vb_mc_addr + accel_state->vb_start_op;
    vtx_res.bo              = accel_state->vb_bo;
    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);

    /* Draw */
    draw_conf.prim_type          = DI_PT_RECTLIST;
    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
    draw_conf.num_instances      = 1;
    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;

    draw_auto(pScrn, accel_state->ib, &draw_conf);

    wait_3d_idle_clean(pScrn, accel_state->ib);

    /* sync dst surface */
    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
                  accel_state->dst_size, accel_state->dst_mc_addr,
                  accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0);

    wait_3d_idle_clean(pScrn, accel_state->ib);

    accel_state->vb_start_op = -1;
    accel_state->ib_reset_op = 0;

#if KMS_MULTI_OP
    if (!info->cs)
#endif
      R600CPFlushIndirect(pScrn, accel_state->ib);
}

void r600_vb_no_space(ScrnInfoPtr pScrn, int vert_size)
{
#ifdef XF86DRM_MODE
    RADEONInfoPtr info = RADEONPTR(pScrn);
    struct radeon_accel_state *accel_state = info->accel_state; 

    if (info->cs) {
      if (accel_state->vb_bo) {
          if (accel_state->vb_start_op != accel_state->vb_offset) { 
            r600_finish_op(pScrn, vert_size);
            accel_state->ib_reset_op = info->cs->cdw;
          }
          
          /* release the current VBO */
          radeon_vbo_put(pScrn);
      }
      
      /* get a new one */
      radeon_vbo_get(pScrn);
      return;
    }
#endif 

    r600_finish_op(pScrn, vert_size);
    r600_cp_start(pScrn);
}

Generated by  Doxygen 1.6.0   Back to index