PAPI  5.1.0.2
perfctr-x86.c File Reference
Include dependency graph for perfctr-x86.c:

Go to the source code of this file.

Defines

#define P4_VEC   "SSE"
#define P4_FPU   " X87 SSE_DP"
#define AMD_FPU   "SPECULATIVE"
#define P4_REPLAY_REAL_MASK   0x00000003

Functions

int _perfctr_init_component (int)
int _perfctr_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
void _perfctr_dispatch_timer (int signal, hwd_siginfo_t *si, void *context)
int _perfctr_init_thread (hwd_context_t *ctx)
int _perfctr_shutdown_thread (hwd_context_t *ctx)
static int is_pentium4 (void)
static int _papi_hwd_fixup_vec (int cidx)
static int _papi_p4_hwd_fixup_fp (int cidx)
static int _papi_hwd_fixup_fp (char *name, int cidx)
static void print_alloc (X86_reg_alloc_t *a)
void print_control (const struct perfctr_cpu_control *control)
int setup_x86_presets (int cputype, int cidx)
static int _x86_init_control_state (hwd_control_state_t *ptr)
int _x86_set_domain (hwd_control_state_t *cntrl, int domain)
static int _bpt_map_avail (hwd_reg_alloc_t *dst, int ctr)
static void _bpt_map_set (hwd_reg_alloc_t *dst, int ctr)
static int _bpt_map_exclusive (hwd_reg_alloc_t *dst)
static int _bpt_map_shared (hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
static void _bpt_map_preempt (hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
static void _bpt_map_update (hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
static int _x86_allocate_registers (EventSetInfo_t *ESI)
static void clear_cs_events (hwd_control_state_t *this_state)
static int _x86_update_control_state (hwd_control_state_t *this_state, NativeInfo_t *native, int count, hwd_context_t *ctx)
static int _x86_start (hwd_context_t *ctx, hwd_control_state_t *state)
static int _x86_stop (hwd_context_t *ctx, hwd_control_state_t *state)
static int _x86_read (hwd_context_t *ctx, hwd_control_state_t *spc, long long **dp, int flags)
static int _x86_reset (hwd_context_t *ctx, hwd_control_state_t *cntrl)
static void swap_events (EventSetInfo_t *ESI, struct hwd_pmc_control *contr, int cntr1, int cntr2)
static int _x86_set_overflow (EventSetInfo_t *ESI, int EventIndex, int threshold)
static int _x86_stop_profiling (ThreadInfo_t *master, EventSetInfo_t *ESI)
static int _pfm_get_counter_info (unsigned int event, unsigned int *selector, int *code)
int _papi_libpfm_ntv_code_to_bits_perfctr (unsigned int EventCode, hwd_register_t *newbits)

Variables

papi_mdi_t _papi_hwi_system_info
papi_vector_t _perfctr_vector
pentium4_escr_reg_t pentium4_escrs []
pentium4_cccr_reg_t pentium4_cccrs []
pentium4_event_t pentium4_events []
static pentium4_replay_regs_t p4_replay_regs []
static int pfm2intel []

Define Documentation

#define AMD_FPU   "SPECULATIVE"

Definition at line 72 of file perfctr-x86.c.

#define P4_FPU   " X87 SSE_DP"

Definition at line 61 of file perfctr-x86.c.

#define P4_REPLAY_REAL_MASK   0x00000003

Definition at line 1093 of file perfctr-x86.c.

#define P4_VEC   "SSE"

Definition at line 51 of file perfctr-x86.c.


Function Documentation

static int _bpt_map_avail ( hwd_reg_alloc_t dst,
int  ctr 
) [static]

Definition at line 456 of file perfctr-x86.c.

{
    return ( int ) ( dst->ra_selector & ( 1 << ctr ) );
}
static int _bpt_map_exclusive ( hwd_reg_alloc_t dst) [static]

Definition at line 485 of file perfctr-x86.c.

{
    return ( dst->ra_rank == 1 );
}
static void _bpt_map_preempt ( hwd_reg_alloc_t dst,
hwd_reg_alloc_t src 
) [static]

Definition at line 533 of file perfctr-x86.c.

{
    int i;
    unsigned shared;

    if ( is_pentium4() ) {
#ifdef DEBUG
        SUBDBG( "src, dst\n" );
        print_alloc( src );
        print_alloc( dst );
#endif

        /* check for a pebs conflict */
        /* pebs enables must both be non-zero */
        i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
                /* and not equal to each other */
                ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
              /* same for pebs_matrix_vert */
              ( ( dst->ra_bits.pebs_matrix_vert &&
                  src->ra_bits.pebs_matrix_vert )
                && ( dst->ra_bits.pebs_matrix_vert !=
                     src->ra_bits.pebs_matrix_vert ) ) );
        if ( i ) {
            SUBDBG( "pebs conflict! clearing selector\n" );
            dst->ra_selector = 0;
            return;
        } else {
            /* remove counters referenced by any shared escrs */
            if ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
                 ( ( int ) dst->ra_escr[0] != -1 ) ) {
                dst->ra_selector &= ~dst->ra_bits.counter[0];
                dst->ra_escr[0] = -1;
            }
            if ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
                 ( ( int ) dst->ra_escr[1] != -1 ) ) {
                dst->ra_selector &= ~dst->ra_bits.counter[1];
                dst->ra_escr[1] = -1;
            }

            /* remove any remaining shared counters */
            shared = ( dst->ra_selector & src->ra_selector );
            if ( shared )
                dst->ra_selector ^= shared;
        }
        /* recompute rank */
        for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
            if ( dst->ra_selector & ( 1 << i ) )
                dst->ra_rank++;
#ifdef DEBUG
        SUBDBG( "new dst\n" );
        print_alloc( dst );
#endif
    } else {
        shared = dst->ra_selector & src->ra_selector;
        if ( shared )
            dst->ra_selector ^= shared;
        for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
            if ( dst->ra_selector & ( 1 << i ) )
                dst->ra_rank++;
    }
}

Here is the call graph for this function:

static void _bpt_map_set ( hwd_reg_alloc_t dst,
int  ctr 
) [static]

Definition at line 465 of file perfctr-x86.c.

{
    dst->ra_selector = ( unsigned int ) ( 1 << ctr );
    dst->ra_rank = 1;

    if ( is_pentium4() ) {
        /* Pentium 4 requires that both an escr and a counter are selected.
           Find which counter mask contains this counter.
           Set the opposite escr to empty (-1) */
        if ( dst->ra_bits.counter[0] & dst->ra_selector )
            dst->ra_escr[1] = -1;
        else
            dst->ra_escr[0] = -1;
    }
}

Here is the call graph for this function:

static int _bpt_map_shared ( hwd_reg_alloc_t dst,
hwd_reg_alloc_t src 
) [static]

Definition at line 495 of file perfctr-x86.c.

{
  if ( is_pentium4() ) {
        int retval1, retval2;
        /* Pentium 4 needs to check for conflict of both counters and esc registers */
        /* selectors must share bits */
        retval1 = ( ( dst->ra_selector & src->ra_selector ) ||
                    /* or escrs must equal each other and not be set to -1 */
                    ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
                      ( ( int ) dst->ra_escr[0] != -1 ) ) ||
                    ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
                      ( ( int ) dst->ra_escr[1] != -1 ) ) );
        /* Pentium 4 also needs to check for conflict on pebs registers */
        /* pebs enables must both be non-zero */
        retval2 =
            ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
                /* and not equal to each other */
                ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
              /* same for pebs_matrix_vert */
              ( ( dst->ra_bits.pebs_matrix_vert &&
                  src->ra_bits.pebs_matrix_vert ) &&
                ( dst->ra_bits.pebs_matrix_vert !=
                  src->ra_bits.pebs_matrix_vert ) ) );
        if ( retval2 ) {
            SUBDBG( "pebs conflict!\n" );
        }
        return ( retval1 | retval2 );
    }

    return ( int ) ( dst->ra_selector & src->ra_selector );
}

Here is the call graph for this function:

static void _bpt_map_update ( hwd_reg_alloc_t dst,
hwd_reg_alloc_t src 
) [static]

Definition at line 596 of file perfctr-x86.c.

{
    dst->ra_selector = src->ra_selector;

    if ( is_pentium4() ) {
        dst->ra_escr[0] = src->ra_escr[0];
        dst->ra_escr[1] = src->ra_escr[1];
    }
}

Here is the call graph for this function:

static int _papi_hwd_fixup_fp ( char *  name,
int  cidx 
) [static]

Definition at line 131 of file perfctr-x86.c.

{
    char table_name[PAPI_MIN_STR_LEN];
    char *str = getenv( "PAPI_OPTERON_FP" );

    /* if the env variable isn't set, return the defaults */
    strcpy( table_name, name );
    strcat( table_name, " FPU " );
    if ( ( str == NULL ) || ( strlen( str ) == 0 ) ) {
        strcat( table_name, AMD_FPU );
    } else {
        strcat( table_name, str );
    }

    if ( ( _papi_load_preset_table( table_name, 0, cidx ) ) != PAPI_OK ) {
       PAPIERROR( "Improper usage of PAPI_OPTERON_FP environment "
                      "variable.\nUse one of RETIRED, SPECULATIVE, "
              "SSE_SP, SSE_DP" );
       return PAPI_EINVAL;
    }
    return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int _papi_hwd_fixup_vec ( int  cidx) [static]

Definition at line 86 of file perfctr-x86.c.

{
    char table_name[PAPI_MIN_STR_LEN] = "Intel Pentium4 VEC ";
    char *str = getenv( "PAPI_PENTIUM4_VEC" );

    /* if the env variable isn't set, use the default */
    if ( ( str == NULL ) || ( strlen( str ) == 0 ) ) {
       strcat( table_name, P4_VEC );
    } else {
       strcat( table_name, str );
    }
    if ( ( _papi_load_preset_table( table_name, 0, cidx ) ) != PAPI_OK ) {
       PAPIERROR( "Improper usage of PAPI_PENTIUM4_VEC environment "
                      "variable.\nUse either SSE or MMX" );
       return PAPI_EINVAL;
    }
    return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int _papi_libpfm_ntv_code_to_bits_perfctr ( unsigned int  EventCode,
hwd_register_t newbits 
)

Definition at line 1205 of file perfctr-x86.c.

{
    unsigned int event, umask;

    X86_register_t *bits = (X86_register_t *)newbits;

    if ( is_pentium4() ) {
       pentium4_escr_value_t escr_value;
       pentium4_cccr_value_t cccr_value;
       unsigned int num_masks, replay_mask, unit_masks[12];
       unsigned int event_mask;
       unsigned int tag_value, tag_enable;
       unsigned int i;
       int j, escr, cccr, pmd;

       if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
      return PAPI_ENOEVNT;

       /* for each allowed escr (1 or 2) find the allowed cccrs.
      for each allowed cccr find the pmd index
      convert to an intel counter number; or it into bits->counter */
       for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) {
      bits->counter[i] = 0;
      escr = pentium4_events[event].allowed_escrs[i];
      if ( escr < 0 ) {
         continue;
      }

      bits->escr[i] = escr;

      for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) {
         cccr = pentium4_escrs[escr].allowed_cccrs[j];
         if ( cccr < 0 ) {
        continue;
         }

         pmd = pentium4_cccrs[cccr].pmd;
         bits->counter[i] |= ( 1 << pfm2intel[pmd] );
      }
       }

       /* if there's only one valid escr, copy the values */
       if ( escr < 0 ) {
      bits->escr[1] = bits->escr[0];
      bits->counter[1] = bits->counter[0];
       }

       /* Calculate the event-mask value. Invalid masks
    * specified by the caller are ignored. */
       tag_value = 0;
       tag_enable = 0;
       event_mask = _pfm_convert_umask( event, umask );

       if ( event_mask & 0xF0000 ) {
      tag_enable = 1;
      tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS );
       }

       event_mask &= 0x0FFFF;   /* mask off possible tag bits */

       /* Set up the ESCR and CCCR register values. */
       escr_value.val = 0;
       escr_value.bits.t1_usr = 0;  /* controlled by kernel */
       escr_value.bits.t1_os = 0;   /* controlled by kernel */
//    escr_value.bits.t0_usr       = (plm & PFM_PLM3) ? 1 : 0;
//    escr_value.bits.t0_os        = (plm & PFM_PLM0) ? 1 : 0;
       escr_value.bits.tag_enable = tag_enable;
       escr_value.bits.tag_value = tag_value;
       escr_value.bits.event_mask = event_mask;
       escr_value.bits.event_select = pentium4_events[event].event_select;
       escr_value.bits.reserved = 0;

       /* initialize the proper bits in the cccr register */
       cccr_value.val = 0;
       cccr_value.bits.reserved1 = 0;
       cccr_value.bits.enable = 1;
       cccr_value.bits.escr_select = pentium4_events[event].escr_select;
       cccr_value.bits.active_thread = 3;   
       /* FIXME: This is set to count when either logical
    *        CPU is active. Need a way to distinguish
    *        between logical CPUs when HT is enabled.
        *        the docs say these bits should always 
    *        be set.                                  */
       cccr_value.bits.compare = 0; 
       /* FIXME: What do we do with "threshold" settings? */
       cccr_value.bits.complement = 0;  
       /* FIXME: What do we do with "threshold" settings? */
       cccr_value.bits.threshold = 0;   
       /* FIXME: What do we do with "threshold" settings? */
       cccr_value.bits.force_ovf = 0;   
       /* FIXME: Do we want to allow "forcing" overflow
        *        interrupts on all counter increments? */
       cccr_value.bits.ovf_pmi_t0 = 0;
       cccr_value.bits.ovf_pmi_t1 = 0;  
       /* PMI taken care of by kernel typically */
       cccr_value.bits.reserved2 = 0;
       cccr_value.bits.cascade = 0; 
       /* FIXME: How do we handle "cascading" counters? */
       cccr_value.bits.overflow = 0;

       /* these flags are always zero, from what I can tell... */
       bits->pebs_enable = 0;   /* flag for PEBS counting */
       bits->pebs_matrix_vert = 0;  
       /* flag for PEBS_MATRIX_VERT, whatever that is */

       /* ...unless the event is replay_event */
       if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) {
      escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK;
      num_masks = prepare_umask( umask, unit_masks );
      for ( i = 0; i < num_masks; i++ ) {
         replay_mask = unit_masks[i];
         if ( replay_mask > 1 && replay_mask < 11 ) {
            /* process each valid mask we find */
        bits->pebs_enable |= p4_replay_regs[replay_mask].enb;
        bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert;
         }
      }
       }

       /* store the escr and cccr values */
       bits->event = escr_value.val;
       bits->cccr = cccr_value.val;
       bits->ireset = 0;     /* I don't really know what this does */
       SUBDBG( "escr: 0x%lx; cccr:  0x%lx\n", escr_value.val, cccr_value.val );
    } else {

       int ret, code;

       if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
      return PAPI_ENOEVNT;

       if ( ( ret = _pfm_get_counter_info( event, &bits->selector,
                          &code ) ) != PAPI_OK )
      return ret;

       bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) );

       SUBDBG( "selector: 0x%x\n", bits->selector );
       SUBDBG( "event: 0x%x; umask: 0x%x; code: 0x%x; cmd: 0x%x\n", event,
           umask, code, ( ( hwd_register_t * ) bits )->counter_cmd );
    }

    return PAPI_OK;
}

Here is the call graph for this function:

static int _papi_p4_hwd_fixup_fp ( int  cidx) [static]

Definition at line 106 of file perfctr-x86.c.

{
    char table_name[PAPI_MIN_STR_LEN] = "Intel Pentium4 FPU";
    char *str = getenv( "PAPI_PENTIUM4_FP" );

    /* if the env variable isn't set, use the default */
    if ( ( str == NULL ) || ( strlen( str ) == 0 ) ) {
        strcat( table_name, P4_FPU );
    } else {
        if ( strstr( str, "X87" ) )
            strcat( table_name, " X87" );
        if ( strstr( str, "SSE_SP" ) )
            strcat( table_name, " SSE_SP" );
        if ( strstr( str, "SSE_DP" ) )
            strcat( table_name, " SSE_DP" );
    }
    if ( ( _papi_load_preset_table( table_name, 0, cidx ) ) != PAPI_OK ) {
       PAPIERROR( "Improper usage of PAPI_PENTIUM4_FP environment "
                      "variable.\nUse one or two of X87,SSE_SP,SSE_DP" );
       return PAPI_EINVAL;
    }
    return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int _perfctr_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

Definition at line 290 of file perfctr.c.

{
    ( void ) ctx;            /*unused */
    switch ( code ) {
    case PAPI_DOMAIN:
    case PAPI_DEFDOM:
#if defined(PPC64)
        return ( _perfctr_vector.
                 set_domain( option->domain.ESI, option->domain.domain ) );
#else
        return ( _perfctr_vector.
                 set_domain( option->domain.ESI->ctl_state,
                             option->domain.domain ) );
#endif
    case PAPI_GRANUL:
    case PAPI_DEFGRN:
        return PAPI_ECMP;
    case PAPI_ATTACH:
        return ( attach( option->attach.ESI->ctl_state, option->attach.tid ) );
    case PAPI_DETACH:
        return ( detach( option->attach.ESI->ctl_state ) );
    case PAPI_DEF_ITIMER:
    {
        /* flags are currently ignored, eventually the flags will be able
           to specify whether or not we use POSIX itimers (clock_gettimer) */
        if ( ( option->itimer.itimer_num == ITIMER_REAL ) &&
             ( option->itimer.itimer_sig != SIGALRM ) )
            return PAPI_EINVAL;
        if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) &&
             ( option->itimer.itimer_sig != SIGVTALRM ) )
            return PAPI_EINVAL;
        if ( ( option->itimer.itimer_num == ITIMER_PROF ) &&
             ( option->itimer.itimer_sig != SIGPROF ) )
            return PAPI_EINVAL;
        if ( option->itimer.ns > 0 )
            option->itimer.ns = round_requested_ns( option->itimer.ns );
        /* At this point, we assume the user knows what he or
           she is doing, they maybe doing something arch specific */
        return PAPI_OK;
    }
    case PAPI_DEF_MPX_NS:
    {
        option->multiplex.ns =
            ( unsigned long ) round_requested_ns( ( int ) option->multiplex.
                                                  ns );
        return ( PAPI_OK );
    }
    case PAPI_DEF_ITIMER_NS:
    {
        option->itimer.ns = round_requested_ns( option->itimer.ns );
        return ( PAPI_OK );
    }
    default:
        return ( PAPI_ENOSUPP );
    }
}

Here is the call graph for this function:

void _perfctr_dispatch_timer ( int  signal,
hwd_siginfo_t si,
void *  context 
)
int _perfctr_init_component ( int  )

Definition at line 108 of file perfctr.c.

{
    int retval;
    struct perfctr_info info;
    char abiv[PAPI_MIN_STR_LEN];

#if defined(PERFCTR26)
    int fd;
#else
    struct vperfctr *dev;
#endif

#if defined(PERFCTR26)
    /* Get info from the kernel */
    /* Use lower level calls per Mikael to get the perfctr info
       without actually creating a new kernel-side state.
       Also, close the fd immediately after retrieving the info.
       This is much lighter weight and doesn't reserve the counter
       resources. Also compatible with perfctr 2.6.14.
     */
    fd = _vperfctr_open( 0 );
    if ( fd < 0 ) {
       strncpy(_perfctr_vector.cmp_info.disabled_reason,
          VOPEN_ERROR,PAPI_MAX_STR_LEN);
       return PAPI_ESYS;
    }
    retval = perfctr_info( fd, &info );
    close( fd );
    if ( retval < 0 ) {
       strncpy(_perfctr_vector.cmp_info.disabled_reason,
          VINFO_ERROR,PAPI_MAX_STR_LEN);
       return PAPI_ESYS;
    }

    /* copy tsc multiplier to local variable        */
    /* this field appears in perfctr 2.6 and higher */
    tb_scale_factor = ( long long ) info.tsc_to_cpu_mult;
#else
    /* Opened once for all threads. */
    if ( ( dev = vperfctr_open(  ) ) == NULL ) {
       strncpy(_perfctr_vector.cmp_info.disabled_reason,
          VOPEN_ERROR,PAPI_MAX_STR_LEN);
       return PAPI_ESYS;
    }
    SUBDBG( "_perfctr_init_component vperfctr_open = %p\n", dev );

    /* Get info from the kernel */
    retval = vperfctr_info( dev, &info );
    if ( retval < 0 ) {
       strncpy(_perfctr_vector.cmp_info.disabled_reason,
          VINFO_ERROR,PAPI_MAX_STR_LEN);
        return ( PAPI_ESYS );
    }
    vperfctr_close( dev );
#endif

    /* Fill in what we can of the papi_system_info. */
    retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info );
    if ( retval != PAPI_OK )
        return ( retval );

    /* Setup memory info */
    retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info,
                           ( int ) info.cpu_type );
    if ( retval )
        return ( retval );

    strcpy( _perfctr_vector.cmp_info.name,"perfctr.c" );
    strcpy( _perfctr_vector.cmp_info.version, "$Revision$" );
    sprintf( abiv, "0x%08X", info.abi_version );
    strcpy( _perfctr_vector.cmp_info.support_version, abiv );
    strcpy( _perfctr_vector.cmp_info.kernel_version, info.driver_version );
    _perfctr_vector.cmp_info.CmpIdx = cidx;
    _perfctr_vector.cmp_info.num_cntrs = ( int ) PERFCTR_CPU_NRCTRS( &info );
        _perfctr_vector.cmp_info.num_mpx_cntrs=_perfctr_vector.cmp_info.num_cntrs;
    if ( info.cpu_features & PERFCTR_FEATURE_RDPMC )
        _perfctr_vector.cmp_info.fast_counter_read = 1;
    else
        _perfctr_vector.cmp_info.fast_counter_read = 0;
    _perfctr_vector.cmp_info.fast_real_timer = 1;
    _perfctr_vector.cmp_info.fast_virtual_timer = 1;
    _perfctr_vector.cmp_info.attach = 1;
    _perfctr_vector.cmp_info.attach_must_ptrace = 1;
    _perfctr_vector.cmp_info.default_domain = PAPI_DOM_USER;
#if !defined(PPC64)
    /* AMD and Intel ia386 processors all support unit mask bits */
    _perfctr_vector.cmp_info.cntr_umasks = 1;
#endif
#if defined(PPC64)
    _perfctr_vector.cmp_info.available_domains =
        PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR;
#else
    _perfctr_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL;
#endif
    _perfctr_vector.cmp_info.default_granularity = PAPI_GRN_THR;
    _perfctr_vector.cmp_info.available_granularities = PAPI_GRN_THR;
    if ( info.cpu_features & PERFCTR_FEATURE_PCINT )
        _perfctr_vector.cmp_info.hardware_intr = 1;
    else
        _perfctr_vector.cmp_info.hardware_intr = 0;
    SUBDBG( "Hardware/OS %s support counter generated interrupts\n",
            _perfctr_vector.cmp_info.hardware_intr ? "does" : "does not" );

    strcpy( _papi_hwi_system_info.hw_info.model_string,
            PERFCTR_CPU_NAME( &info ) );
    _papi_hwi_system_info.hw_info.model = ( int ) info.cpu_type;
#if defined(PPC64)
    _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_IBM;
    if ( strlen( _papi_hwi_system_info.hw_info.vendor_string ) == 0 )
        strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" );
#else
    _papi_hwi_system_info.hw_info.vendor =
        xlate_cpu_type_to_vendor( info.cpu_type );
#endif

    /* Setup presets last. Some platforms depend on earlier info */
#if !defined(PPC64)
//     retval = setup_p3_vector_table(vtable);
        if ( !retval )
            retval = setup_x86_presets( ( int ) info.cpu_type );
#else
    /* Setup native and preset events */
//  retval = ppc64_setup_vector_table(vtable);
    if ( !retval )
        retval = perfctr_ppc64_setup_native_table(  );
    if ( !retval )
        retval = setup_ppc64_presets( info.cpu_type );
#endif
    if ( retval )
        return ( retval );

    return ( PAPI_OK );
}

Here is the call graph for this function:

Definition at line 381 of file perfctr.c.

{
    struct vperfctr_control tmp;
    int error;

    /* Initialize our thread/process pointer. */
    if ( ( ctx->perfctr = vperfctr_open(  ) ) == NULL ) {
#ifdef VPERFCTR_OPEN_CREAT_EXCL
        /* New versions of perfctr have this, which allows us to
           get a previously created context, i.e. one created after
           a fork and now we're inside a new process that has been exec'd */
        if ( errno ) {
            if ( ( ctx->perfctr = vperfctr_open_mode( 0 ) ) == NULL ) {
               return PAPI_ESYS;
            }
        } else {
            return PAPI_ESYS;
        }
#else
        return PAPI_ESYS;
#endif
    }
    SUBDBG( "_papi_hwd_init vperfctr_open() = %p\n", ctx->perfctr );

    /* Initialize the per thread/process virtualized TSC */
    memset( &tmp, 0x0, sizeof ( tmp ) );
    tmp.cpu_control.tsc_on = 1;

#ifdef VPERFCTR_CONTROL_CLOEXEC
    tmp.flags = VPERFCTR_CONTROL_CLOEXEC;
    SUBDBG( "close on exec\t\t\t%u\n", tmp.flags );
#endif

    /* Start the per thread/process virtualized TSC */
    error = vperfctr_control( ctx->perfctr, &tmp );
    if ( error < 0 ) {
        SUBDBG( "starting virtualized TSC; vperfctr_control returns %d\n",
                error );
        return PAPI_ESYS;
    }

    return PAPI_OK;
}

Definition at line 429 of file perfctr.c.

{
#ifdef DEBUG
    int retval = vperfctr_unlink( ctx->perfctr );
    SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr,
            retval );
#else
    vperfctr_unlink( ctx->perfctr );
#endif
    vperfctr_close( ctx->perfctr );
    SUBDBG( "_perfctr_shutdown vperfctr_close(%p)\n", ctx->perfctr );
    memset( ctx, 0x0, sizeof ( hwd_context_t ) );
    return ( PAPI_OK );
}
static int _pfm_get_counter_info ( unsigned int  event,
unsigned int *  selector,
int *  code 
) [static]

Definition at line 1160 of file perfctr-x86.c.

{
    pfmlib_regmask_t cnt, impl;
    unsigned int num;
    unsigned int i, first = 1;
    int ret;

    if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) {
        PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt,
                   pfm_strerror( ret ) );
        return PAPI_ESYS;
    }
    if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) {
        PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) );
        return PAPI_ESYS;
    }
    if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) {
        PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl,
                   pfm_strerror( ret ) );
        return PAPI_ESYS;
    }

    *selector = 0;
    for ( i = 0; num; i++ ) {
        if ( pfm_regmask_isset( &impl, i ) )
            num--;
        if ( pfm_regmask_isset( &cnt, i ) ) {
            if ( first ) {
                if ( ( ret =
                       pfm_get_event_code_counter( event, i,
                                                   code ) ) !=
                     PFMLIB_SUCCESS ) {
                    PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s",
                           event, i, code, pfm_strerror( ret ) );
                    return PAPI_ESYS;
                }
                first = 0;
            }
            *selector |= 1 << i;
        }
    }
    return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int _x86_allocate_registers ( EventSetInfo_t ESI) [static]

Definition at line 608 of file perfctr-x86.c.

{
    int i, j, natNum;
    hwd_reg_alloc_t event_list[MAX_COUNTERS];
    hwd_register_t *ptr;

    /* Initialize the local structure needed
       for counter allocation and optimization. */
    natNum = ESI->NativeCount;

    if ( is_pentium4() ) {
        SUBDBG( "native event count: %d\n", natNum );
    }

    for ( i = 0; i < natNum; i++ ) {
        /* retrieve the mapping information about this native event */
        _papi_libpfm_ntv_code_to_bits( ( unsigned int ) ESI->NativeInfoArray[i].
                               ni_event, &event_list[i].ra_bits );

        if ( is_pentium4() ) {
            /* combine counter bit masks for both esc registers into selector */
            event_list[i].ra_selector =
                event_list[i].ra_bits.counter[0] | event_list[i].ra_bits.
                counter[1];
        } else {
            /* make sure register allocator only looks at legal registers */
            event_list[i].ra_selector =
                event_list[i].ra_bits.selector & ALLCNTRS;
#ifdef PERFCTR_X86_INTEL_CORE2
            if ( _papi_hwi_system_info.hw_info.model ==
                 PERFCTR_X86_INTEL_CORE2 )
                event_list[i].ra_selector |=
                    ( ( event_list[i].ra_bits.
                        selector >> 16 ) << 2 ) & ALLCNTRS;
#endif
        }
        /* calculate native event rank, which is no. of counters it can live on */
        event_list[i].ra_rank = 0;
        for ( j = 0; j < MAX_COUNTERS; j++ ) {
            if ( event_list[i].ra_selector & ( 1 << j ) ) {
                event_list[i].ra_rank++;
            }
        }

        if ( is_pentium4() ) {
            event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0];
            event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1];
#ifdef DEBUG
            SUBDBG( "i: %d\n", i );
            print_alloc( &event_list[i] );
#endif
        }
    }
    if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) {   /* successfully mapped */
        for ( i = 0; i < natNum; i++ ) {
#ifdef PERFCTR_X86_INTEL_CORE2
            if ( _papi_hwi_system_info.hw_info.model ==
                 PERFCTR_X86_INTEL_CORE2 )
                event_list[i].ra_bits.selector = event_list[i].ra_selector;
#endif
#ifdef DEBUG
            if ( is_pentium4() ) {
                SUBDBG( "i: %d\n", i );
                print_alloc( &event_list[i] );
            }
#endif
            /* Copy all info about this native event to the NativeInfo struct */
            ptr = ESI->NativeInfoArray[i].ni_bits;
            *ptr = event_list[i].ra_bits;

            if ( is_pentium4() ) {
                /* The selector contains the counter bit position. Turn it into a number
                   and store it in the first counter value, zeroing the second. */
                ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1;
                ptr->counter[1] = 0;
            }

            /* Array order on perfctr is event ADD order, not counter #... */
            ESI->NativeInfoArray[i].ni_position = i;
        }
        return PAPI_OK;
    } else
        return PAPI_ECNFLCT;
}

Here is the call graph for this function:

static int _x86_init_control_state ( hwd_control_state_t ptr) [static]

Definition at line 309 of file perfctr-x86.c.

{
    int i, def_mode = 0;

    if ( is_pentium4() ) {
        if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
            def_mode |= ESCR_T0_USR;
        if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
            def_mode |= ESCR_T0_OS;

        for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
            ptr->control.cpu_control.evntsel_aux[i] |= def_mode;
        }
        ptr->control.cpu_control.tsc_on = 1;
        ptr->control.cpu_control.nractrs = 0;
        ptr->control.cpu_control.nrictrs = 0;

#ifdef VPERFCTR_CONTROL_CLOEXEC
        ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
        SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
#endif
    } else {

        if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
            def_mode |= PERF_USR;
        if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
            def_mode |= PERF_OS;

        ptr->allocated_registers.selector = 0;
        switch ( _papi_hwi_system_info.hw_info.model ) {
        case PERFCTR_X86_GENERIC:
        case PERFCTR_X86_WINCHIP_C6:
        case PERFCTR_X86_WINCHIP_2:
        case PERFCTR_X86_VIA_C3:
        case PERFCTR_X86_INTEL_P5:
        case PERFCTR_X86_INTEL_P5MMX:
        case PERFCTR_X86_INTEL_PII:
        case PERFCTR_X86_INTEL_P6:
        case PERFCTR_X86_INTEL_PIII:
#ifdef PERFCTR_X86_INTEL_CORE
        case PERFCTR_X86_INTEL_CORE:
#endif
#ifdef PERFCTR_X86_INTEL_PENTM
        case PERFCTR_X86_INTEL_PENTM:
#endif
            ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE;
            for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
                ptr->control.cpu_control.evntsel[i] |= def_mode;
                ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
            }
            break;
#ifdef PERFCTR_X86_INTEL_CORE2
        case PERFCTR_X86_INTEL_CORE2:
#endif
#ifdef PERFCTR_X86_INTEL_ATOM
        case PERFCTR_X86_INTEL_ATOM:
#endif
#ifdef PERFCTR_X86_INTEL_NHLM
        case PERFCTR_X86_INTEL_NHLM:
#endif
#ifdef PERFCTR_X86_INTEL_WSTMR
        case PERFCTR_X86_INTEL_WSTMR:
#endif
#ifdef PERFCTR_X86_AMD_K8
        case PERFCTR_X86_AMD_K8:
#endif
#ifdef PERFCTR_X86_AMD_K8C
        case PERFCTR_X86_AMD_K8C:
#endif
#ifdef PERFCTR_X86_AMD_FAM10H   /* this is defined in perfctr 2.6.29 */
        case PERFCTR_X86_AMD_FAM10H:
#endif
        case PERFCTR_X86_AMD_K7:
            for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
                ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode;
                ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
            }
            break;
        }
#ifdef VPERFCTR_CONTROL_CLOEXEC
        ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
        SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
#endif

        /* Make sure the TSC is always on */
        ptr->control.cpu_control.tsc_on = 1;
    }
    return ( PAPI_OK );
}

Here is the call graph for this function:

static int _x86_read ( hwd_context_t ctx,
hwd_control_state_t spc,
long long **  dp,
int  flags 
) [static]

Definition at line 891 of file perfctr-x86.c.

{
    if ( flags & PAPI_PAUSED ) {
        vperfctr_read_state( ctx->perfctr, &spc->state, NULL );
        if ( !is_pentium4() ) {
            unsigned int i = 0;
            for ( i = 0;
                  i <
                  spc->control.cpu_control.nractrs +
                  spc->control.cpu_control.nrictrs; i++ ) {
                SUBDBG( "vperfctr_read_state: counter %d =  %lld\n", i,
                        spc->state.pmc[i] );
            }
        }
    } else {
        SUBDBG( "vperfctr_read_ctrs\n" );
        if ( spc->rvperfctr != NULL ) {
            rvperfctr_read_ctrs( spc->rvperfctr, &spc->state );
        } else {
            vperfctr_read_ctrs( ctx->perfctr, &spc->state );
        }
    }
    *dp = ( long long * ) spc->state.pmc;
#ifdef DEBUG
    {
        if ( ISLEVEL( DEBUG_SUBSTRATE ) ) {
            unsigned int i;
            if ( is_pentium4() ) {
                for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) {
                    SUBDBG( "raw val hardware index %d is %lld\n", i,
                            ( long long ) spc->state.pmc[i] );
                }
            } else {
                for ( i = 0;
                      i <
                      spc->control.cpu_control.nractrs +
                      spc->control.cpu_control.nrictrs; i++ ) {
                    SUBDBG( "raw val hardware index %d is %lld\n", i,
                            ( long long ) spc->state.pmc[i] );
                }
            }
        }
    }
#endif
    return ( PAPI_OK );
}

Here is the call graph for this function:

static int _x86_reset ( hwd_context_t ctx,
hwd_control_state_t cntrl 
) [static]

Definition at line 940 of file perfctr-x86.c.

{
    return ( _x86_start( ctx, cntrl ) );
}

Here is the call graph for this function:

int _x86_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

Definition at line 400 of file perfctr-x86.c.

{
    int i, did = 0;
    int num_cntrs = _perfctr_vector.cmp_info.num_cntrs;

    /* Clear the current domain set for this event set */
    /* We don't touch the Enable bit in this code */
    if ( is_pentium4() ) {
        for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
            cntrl->control.cpu_control.evntsel_aux[i] &=
                ~( ESCR_T0_OS | ESCR_T0_USR );
        }

        if ( domain & PAPI_DOM_USER ) {
            did = 1;
            for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
                cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR;
            }
        }

        if ( domain & PAPI_DOM_KERNEL ) {
            did = 1;
            for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
                cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS;
            }
        }
    } else {
        for ( i = 0; i < num_cntrs; i++ ) {
            cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR );
        }

        if ( domain & PAPI_DOM_USER ) {
            did = 1;
            for ( i = 0; i < num_cntrs; i++ ) {
                cntrl->control.cpu_control.evntsel[i] |= PERF_USR;
            }
        }

        if ( domain & PAPI_DOM_KERNEL ) {
            did = 1;
            for ( i = 0; i < num_cntrs; i++ ) {
                cntrl->control.cpu_control.evntsel[i] |= PERF_OS;
            }
        }
    }

    if ( !did )
        return ( PAPI_EINVAL );
    else
        return ( PAPI_OK );
}

Here is the call graph for this function:

static int _x86_set_overflow ( EventSetInfo_t ESI,
int  EventIndex,
int  threshold 
) [static]

Definition at line 995 of file perfctr-x86.c.

{
       hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state );
       struct hwd_pmc_control *contr = &(ctl->control);
    int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0;
    OVFDBG( "EventIndex=%d\n", EventIndex );

#ifdef DEBUG
    if ( is_pentium4() )
      print_control( &(contr->cpu_control) );
#endif

    /* The correct event to overflow is EventIndex */
    ncntrs = _perfctr_vector.cmp_info.num_cntrs;
    i = ESI->EventInfoArray[EventIndex].pos[0];

    if ( i >= ncntrs ) {
        PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs );
        return PAPI_EINVAL;
    }

    if ( threshold != 0 ) {  /* Set an overflow threshold */
        retval = _papi_hwi_start_signal( _perfctr_vector.cmp_info.hardware_intr_sig,
                                         NEED_CONTEXT,
                                         _perfctr_vector.cmp_info.CmpIdx );
        if ( retval != PAPI_OK )
            return ( retval );

        /* overflow interrupt occurs on the NEXT event after overflow occurs
           thus we subtract 1 from the threshold. */
        contr->cpu_control.ireset[i] = ( -threshold + 1 );

        if ( is_pentium4() )
            contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0;
        else
            contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE;

        contr->cpu_control.nrictrs++;
        contr->cpu_control.nractrs--;
        nricntrs = ( int ) contr->cpu_control.nrictrs;
        nracntrs = ( int ) contr->cpu_control.nractrs;
        contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig;

        /* move this event to the bottom part of the list if needed */
        if ( i < nracntrs )
            swap_events( ESI, contr, i, nracntrs );
        OVFDBG( "Modified event set\n" );
    } else {
      if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) {
            contr->cpu_control.ireset[i] = 0;
            contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 );
            contr->cpu_control.nrictrs--;
            contr->cpu_control.nractrs++;
      } else if ( !is_pentium4() &&
                    contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) {
            contr->cpu_control.ireset[i] = 0;
            contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE );
            contr->cpu_control.nrictrs--;
            contr->cpu_control.nractrs++;
        }

        nricntrs = ( int ) contr->cpu_control.nrictrs;
        nracntrs = ( int ) contr->cpu_control.nractrs;

        /* move this event to the top part of the list if needed */
        if ( i >= nracntrs )
            swap_events( ESI, contr, i, nracntrs - 1 );

        if ( !nricntrs )
            contr->si_signo = 0;

        OVFDBG( "Modified event set\n" );

        retval = _papi_hwi_stop_signal( _perfctr_vector.cmp_info.hardware_intr_sig );
    }

#ifdef DEBUG
    if ( is_pentium4() )
      print_control( &(contr->cpu_control) );
#endif
    OVFDBG( "End of call. Exit code: %d\n", retval );
    return ( retval );
}

Here is the call graph for this function:

static int _x86_start ( hwd_context_t ctx,
hwd_control_state_t state 
) [static]

Definition at line 843 of file perfctr-x86.c.

{
    int error;
#ifdef DEBUG
    print_control( &state->control.cpu_control );
#endif

    if ( state->rvperfctr != NULL ) {
        if ( ( error =
               rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) {
            SUBDBG( "rvperfctr_control returns: %d\n", error );
            PAPIERROR( RCNTRL_ERROR );
            return ( PAPI_ESYS );
        }
        return ( PAPI_OK );
    }

    if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) {
        SUBDBG( "vperfctr_control returns: %d\n", error );
        PAPIERROR( VCNTRL_ERROR );
        return ( PAPI_ESYS );
    }
    return ( PAPI_OK );
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int _x86_stop ( hwd_context_t ctx,
hwd_control_state_t state 
) [static]

Definition at line 869 of file perfctr-x86.c.

{
    int error;

    if ( state->rvperfctr != NULL ) {
        if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) {
            PAPIERROR( RCNTRL_ERROR );
            return ( PAPI_ESYS );
        }
        return ( PAPI_OK );
    }

    error = vperfctr_stop( ctx->perfctr );
    if ( error < 0 ) {
        SUBDBG( "vperfctr_stop returns: %d\n", error );
        PAPIERROR( VCNTRL_ERROR );
        return ( PAPI_ESYS );
    }
    return ( PAPI_OK );
}

Here is the call graph for this function:

static int _x86_stop_profiling ( ThreadInfo_t master,
EventSetInfo_t ESI 
) [static]

Definition at line 1080 of file perfctr-x86.c.

{
    ( void ) master;         /*unused */
    ( void ) ESI;            /*unused */
    return ( PAPI_OK );
}
static int _x86_update_control_state ( hwd_control_state_t this_state,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
) [static]

Definition at line 740 of file perfctr-x86.c.

{
    ( void ) ctx;            /*unused */
    unsigned int i, k, retval = PAPI_OK;
    hwd_register_t *bits,*bits2;
    struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control;

    /* clear out the events from the control state */
    clear_cs_events( this_state );

    if ( is_pentium4() ) {
        /* fill the counters we're using */
        for ( i = 0; i < ( unsigned int ) count; i++ ) {
            /* dereference the mapping information about this native event */
            bits = native[i].ni_bits;

            /* Add counter control command values to eventset */
            cpu_control->pmc_map[i] = bits->counter[0];
            cpu_control->evntsel[i] = bits->cccr;
            cpu_control->ireset[i] = bits->ireset;
            cpu_control->pmc_map[i] |= FAST_RDPMC;
            cpu_control->evntsel_aux[i] |= bits->event;

            /* pebs_enable and pebs_matrix_vert are shared registers used for replay_events.
               Replay_events count L1 and L2 cache events. There is only one of each for 
               the entire eventset. Therefore, there can be only one unique replay_event 
               per eventset. This means L1 and L2 can't be counted together. Which stinks.
               This conflict should be trapped in the allocation scheme, but we'll test for it
               here too, just in case. */
            if ( bits->pebs_enable ) {
                /* if pebs_enable isn't set, just copy */
                if ( cpu_control->p4.pebs_enable == 0 ) {
                    cpu_control->p4.pebs_enable = bits->pebs_enable;
                    /* if pebs_enable conflicts, flag an error */
                } else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) {
                    SUBDBG
                        ( "WARNING: P4_update_control_state -- pebs_enable conflict!" );
                    retval = PAPI_ECNFLCT;
                }
                /* if pebs_enable == bits->pebs_enable, do nothing */
            }
            if ( bits->pebs_matrix_vert ) {
                /* if pebs_matrix_vert isn't set, just copy */
                if ( cpu_control->p4.pebs_matrix_vert == 0 ) {
                    cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert;
                    /* if pebs_matrix_vert conflicts, flag an error */
                } else if ( cpu_control->p4.pebs_matrix_vert !=
                            bits->pebs_matrix_vert ) {
                    SUBDBG
                        ( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" );
                    retval = PAPI_ECNFLCT;
                }
                /* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */
            }
        }
        this_state->control.cpu_control.nractrs = count;

        /* Make sure the TSC is always on */
        this_state->control.cpu_control.tsc_on = 1;

#ifdef DEBUG
        print_control( &this_state->control.cpu_control );
#endif
    } else {
        switch ( _papi_hwi_system_info.hw_info.model ) {
#ifdef PERFCTR_X86_INTEL_CORE2
        case PERFCTR_X86_INTEL_CORE2:
            /* fill the counters we're using */
            for ( i = 0; i < ( unsigned int ) count; i++ ) {
                bits2 = native[i].ni_bits;
                for ( k = 0; k < MAX_COUNTERS; k++ )
                    if ( bits2->selector & ( 1 << k ) ) {
                        break;
                    }
                if ( k > 1 )
                    this_state->control.cpu_control.pmc_map[i] =
                        ( k - 2 ) | 0x40000000;
                else
                    this_state->control.cpu_control.pmc_map[i] = k;

                /* Add counter control command values to eventset */
                this_state->control.cpu_control.evntsel[i] |=
                    bits2->counter_cmd;
            }
            break;
#endif
        default:
            /* fill the counters we're using */
            for ( i = 0; i < ( unsigned int ) count; i++ ) {
                /* Add counter control command values to eventset */
                 bits2 = native[i].ni_bits;
                this_state->control.cpu_control.evntsel[i] |=
                    bits2->counter_cmd;
            }
        }
        this_state->control.cpu_control.nractrs = ( unsigned int ) count;
    }
    return retval;
}

Here is the call graph for this function:

static void clear_cs_events ( hwd_control_state_t this_state) [static]

Definition at line 694 of file perfctr-x86.c.

{
    unsigned int i, j;

    /* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */
    j = this_state->control.cpu_control.nractrs +
        this_state->control.cpu_control.nrictrs;

    /* Remove all counter control command values from eventset. */
    for ( i = 0; i < j; i++ ) {
        SUBDBG( "Clearing pmc event entry %d\n", i );
        if ( is_pentium4() ) {
            this_state->control.cpu_control.pmc_map[i] = 0;
            this_state->control.cpu_control.evntsel[i] = 0;
            this_state->control.cpu_control.evntsel_aux[i] =
                this_state->control.cpu_control.
                evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR );
        } else {
            this_state->control.cpu_control.pmc_map[i] = i;
            this_state->control.cpu_control.evntsel[i]
                = this_state->control.cpu_control.
                evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR );
        }
        this_state->control.cpu_control.ireset[i] = 0;
    }

    if ( is_pentium4() ) {
        /* Clear pebs stuff */
        this_state->control.cpu_control.p4.pebs_enable = 0;
        this_state->control.cpu_control.p4.pebs_matrix_vert = 0;
    }

    /* clear both a and i counter counts */
    this_state->control.cpu_control.nractrs = 0;
    this_state->control.cpu_control.nrictrs = 0;

#ifdef DEBUG
    if ( is_pentium4() )
        print_control( &this_state->control.cpu_control );
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int is_pentium4 ( void  ) [inline, static]

Definition at line 75 of file perfctr-x86.c.

                                    {
  if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) &&
       ( _papi_hwi_system_info.hw_info.cpuid_family == 15 )) {
    return 1;
  }

  return 0;

}

Here is the caller graph for this function:

static void print_alloc ( X86_reg_alloc_t a) [static]

Definition at line 156 of file perfctr-x86.c.

{
    SUBDBG( "X86_reg_alloc:\n" );
    SUBDBG( "  selector: 0x%x\n", a->ra_selector );
    SUBDBG( "  rank: 0x%x\n", a->ra_rank );
    SUBDBG( "  escr: 0x%x 0x%x\n", a->ra_escr[0], a->ra_escr[1] );
}

Here is the caller graph for this function:

void print_control ( const struct perfctr_cpu_control *  control)

Definition at line 165 of file perfctr-x86.c.

{
    unsigned int i;
    SUBDBG( "Control used:\n" );
    SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on );
    SUBDBG( "nractrs\t\t\t%u\n", control->nractrs );
    SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs );

    for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) {
        if ( control->pmc_map[i] >= 18 ) {
            SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] );
        } else {
            SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] );
        }
        SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] );
        if ( control->ireset[i] ) {
            SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] );
        }
    }
}
int setup_x86_presets ( int  cputype,
int  cidx 
)

Definition at line 190 of file perfctr-x86.c.

{
    int retval = PAPI_OK;

        if ( ( retval = _papi_libpfm_init(&_perfctr_vector, cidx ) ) != PAPI_OK ) {
       return retval;
    }

    if ( is_pentium4() ) {
        /* load the baseline event map for all Pentium 4s */

      _papi_load_preset_table( "Intel Pentium4", 0, cidx ); /* base events */

        /* fix up the floating point and vector ops */
        if ( ( retval = _papi_p4_hwd_fixup_fp(cidx  ) ) != PAPI_OK )
            return ( retval );
        if ( ( retval = _papi_hwd_fixup_vec( cidx ) ) != PAPI_OK )
            return ( retval );

        /* install L3 cache events iff 3 levels of cache exist */
        if ( _papi_hwi_system_info.hw_info.mem_hierarchy.levels == 3 )
          _papi_load_preset_table( "Intel Pentium4 L3", 0, cidx );

        /* overload with any model dependent events */
        if ( cputype == PERFCTR_X86_INTEL_P4 ) {
            /* do nothing besides the base map */
        } else if ( cputype == PERFCTR_X86_INTEL_P4M2 ) {
        }
#ifdef PERFCTR_X86_INTEL_P4M3
        else if ( cputype == PERFCTR_X86_INTEL_P4M3 ) {
        }
#endif
        else {
            PAPIERROR( MODEL_ERROR );
            return PAPI_ENOIMPL;
        }
    } else {
        switch ( cputype ) {
        case PERFCTR_X86_GENERIC:
        case PERFCTR_X86_WINCHIP_C6:
        case PERFCTR_X86_WINCHIP_2:
        case PERFCTR_X86_VIA_C3:
        case PERFCTR_X86_INTEL_P5:
        case PERFCTR_X86_INTEL_P5MMX:
            SUBDBG( "This cpu is not supported by the perfctr-x86 component\n" );
            PAPIERROR( MODEL_ERROR );
            return PAPI_ENOIMPL;
        case PERFCTR_X86_INTEL_P6:
          _papi_load_preset_table( "Intel P6", 0, cidx );   /* base events */
            break;
        case PERFCTR_X86_INTEL_PII:
          _papi_load_preset_table( "Intel P6", 0, cidx );   /* base events */
            break;
        case PERFCTR_X86_INTEL_PIII:
          _papi_load_preset_table( "Intel P6", 0, cidx );   /* base events */
          _papi_load_preset_table( "Intel PentiumIII", 0, cidx );   /* events that differ from Pentium M */
            break;
#ifdef PERFCTR_X86_INTEL_PENTM
        case PERFCTR_X86_INTEL_PENTM:
          _papi_load_preset_table( "Intel P6", 0, cidx );   /* base events */
          _papi_load_preset_table( "Intel PentiumM", 0, cidx ); /* events that differ from PIII */
            break;
#endif
#ifdef PERFCTR_X86_INTEL_CORE
        case PERFCTR_X86_INTEL_CORE:
          _papi_load_preset_table( "Intel Core Duo/Solo", 0, cidx );
            break;
#endif
#ifdef PERFCTR_X86_INTEL_CORE2
        case PERFCTR_X86_INTEL_CORE2:
          _papi_load_preset_table( "Intel Core2", 0, cidx );
            break;
#endif
        case PERFCTR_X86_AMD_K7:
          _papi_load_preset_table( "AMD64 (K7)", 0, cidx );
            break;
#ifdef PERFCTR_X86_AMD_K8    /* this is defined in perfctr 2.5.x */
        case PERFCTR_X86_AMD_K8:
          _papi_load_preset_table( "AMD64", 0, cidx );
          _papi_hwd_fixup_fp( "AMD64", cidx );
            break;
#endif
#ifdef PERFCTR_X86_AMD_K8C   /* this is defined in perfctr 2.6.x */
        case PERFCTR_X86_AMD_K8C:
          _papi_load_preset_table( "AMD64", 0, cidx );
          _papi_hwd_fixup_fp( "AMD64", cidx );
            break;
#endif
#ifdef PERFCTR_X86_AMD_FAM10 /* this is defined in perfctr 2.6.29 */
        case PERFCTR_X86_AMD_FAM10:
          _papi_load_preset_table( "AMD64 (Barcelona)", 0, cidx );
            break;
#endif
#ifdef PERFCTR_X86_INTEL_ATOM   /* family 6 model 28 */
        case PERFCTR_X86_INTEL_ATOM:
          _papi_load_preset_table( "Intel Atom", 0, cidx );
            break;
#endif
#ifdef PERFCTR_X86_INTEL_NHLM   /* family 6 model 26 */
        case PERFCTR_X86_INTEL_NHLM:
          _papi_load_preset_table( "Intel Nehalem", 0, cidx );
            break;
#endif
#ifdef PERFCTR_X86_INTEL_WSTMR
        case PERFCTR_X86_INTEL_WSTMR:
          _papi_load_preset_table( "Intel Westmere", 0, cidx );
            break;
#endif
        default:
            PAPIERROR( MODEL_ERROR );
            return PAPI_ENOIMPL;
        }
        SUBDBG( "Number of native events: %d\n",
                _perfctr_vector.cmp_info.num_native_events );
    }
    return retval;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void swap_events ( EventSetInfo_t ESI,
struct hwd_pmc_control contr,
int  cntr1,
int  cntr2 
) [static]

Definition at line 952 of file perfctr-x86.c.

{
    unsigned int ui;
    int si, i, j;

    for ( i = 0; i < ESI->NativeCount; i++ ) {
        if ( ESI->NativeInfoArray[i].ni_position == cntr1 )
            ESI->NativeInfoArray[i].ni_position = cntr2;
        else if ( ESI->NativeInfoArray[i].ni_position == cntr2 )
            ESI->NativeInfoArray[i].ni_position = cntr1;
    }

    for ( i = 0; i < ESI->NumberOfEvents; i++ ) {
        for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) {
            if ( ESI->EventInfoArray[i].pos[j] == cntr1 )
                ESI->EventInfoArray[i].pos[j] = cntr2;
            else if ( ESI->EventInfoArray[i].pos[j] == cntr2 )
                ESI->EventInfoArray[i].pos[j] = cntr1;
        }
    }

    ui = contr->cpu_control.pmc_map[cntr1];
    contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2];
    contr->cpu_control.pmc_map[cntr2] = ui;

    ui = contr->cpu_control.evntsel[cntr1];
    contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2];
    contr->cpu_control.evntsel[cntr2] = ui;

    if ( is_pentium4() ) {
        ui = contr->cpu_control.evntsel_aux[cntr1];
        contr->cpu_control.evntsel_aux[cntr1] =
            contr->cpu_control.evntsel_aux[cntr2];
        contr->cpu_control.evntsel_aux[cntr2] = ui;
    }

    si = contr->cpu_control.ireset[cntr1];
    contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2];
    contr->cpu_control.ireset[cntr2] = si;
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

Definition at line 56 of file papi_internal.c.

Definition at line 1353 of file perfctr-x86.c.

pentium4_replay_regs_t p4_replay_regs[] [static]

Definition at line 1100 of file perfctr-x86.c.

pentium4_cccr_reg_t pentium4_cccrs[]
pentium4_escr_reg_t pentium4_escrs[]
pentium4_event_t pentium4_events[]
int pfm2intel[] [static]
Initial value:
    { 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 }

Definition at line 1148 of file perfctr-x86.c.

 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines