/* $NetBSD: cache_sh4.c,v 1.24 2013/05/14 14:11:43 tsutsui Exp $ */
/*-
* Copyright (c) 2002 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by UCHIYAMA Yasushi.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cache_sh4.c,v 1.24 2013/05/14 14:11:43 tsutsui Exp $");
#include "opt_cache.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sh3/cache.h>
#include <sh3/cache_sh4.h>
#include <sh3/vmparam.h>
#define round_line(x) (((x) + 31) & ~31)
#define trunc_line(x) ((x) & ~31)
void sh4_icache_sync_all(void);
void sh4_icache_sync_range(vaddr_t, vsize_t);
void sh4_icache_sync_range_index(vaddr_t, vsize_t);
void sh4_dcache_wbinv_all(void);
void sh4_dcache_wbinv_range(vaddr_t, vsize_t);
void sh4_dcache_wbinv_range_index(vaddr_t, vsize_t);
void sh4_dcache_inv_range(vaddr_t, vsize_t);
void sh4_dcache_wb_range(vaddr_t, vsize_t);
/* EMODE */
void sh4_emode_icache_sync_all(void);
void sh4_emode_icache_sync_range_index(vaddr_t, vsize_t);
void sh4_emode_dcache_wbinv_all(void);
void sh4_emode_dcache_wbinv_range_index(vaddr_t, vsize_t);
/* Must be inlined because we "call" them while running on P2 */
static inline void cache_sh4_op_line_32(vaddr_t, vaddr_t, uint32_t,
uint32_t) __attribute__((always_inline));
static inline void cache_sh4_op_8lines_32(vaddr_t, vaddr_t, uint32_t,
uint32_t) __attribute__((always_inline));
static inline void cache_sh4_emode_op_line_32(vaddr_t, vaddr_t,
uint32_t, uint32_t, uint32_t) __attribute__((always_inline));
static inline void cache_sh4_emode_op_8lines_32(vaddr_t, vaddr_t,
uint32_t, uint32_t, uint32_t) __attribute__((always_inline));
void
sh4_cache_config(void)
{
int icache_size;
int dcache_size;
int ways;
uint32_t r;
/* Determine cache size */
switch (cpu_product) {
default:
/* FALLTHROUGH */
case CPU_PRODUCT_7750:
case CPU_PRODUCT_7750S:
case CPU_PRODUCT_7751:
#if defined(SH4_CACHE_DISABLE_EMODE)
case CPU_PRODUCT_7750R:
case CPU_PRODUCT_7751R:
#endif
icache_size = SH4_ICACHE_SIZE;
dcache_size = SH4_DCACHE_SIZE;
ways = 1;
r = SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
break;
#if !defined(SH4_CACHE_DISABLE_EMODE)
case CPU_PRODUCT_7750R:
case CPU_PRODUCT_7751R:
icache_size = SH4_EMODE_ICACHE_SIZE;
dcache_size = SH4_EMODE_DCACHE_SIZE;
ways = 2;
r = SH4_CCR_EMODE|SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
break;
#endif
}
#if defined(SH4_CACHE_DISABLE_ICACHE)
r &= ~SH4_CCR_ICE;
#endif
#if defined(SH4_CACHE_DISABLE_DCACHE)
r &= ~SH4_CCR_OCE;
#endif
#if defined(SH4_CACHE_WB_U0_P0_P3)
r &= ~SH4_CCR_WT;
#endif
#if defined(SH4_CACHE_WB_P1)
r |= SH4_CCR_CB;
#endif
RUN_P2;
if (r & SH4_CCR_EMODE)
SH4_EMODE_CACHE_FLUSH();
else
SH4_CACHE_FLUSH();
_reg_write_4(SH4_CCR, SH4_CCR_ICI|SH4_CCR_OCI);
_reg_write_4(SH4_CCR, r);
RUN_P1;
r = _reg_read_4(SH4_CCR);
sh_cache_unified = 0;
sh_cache_enable_icache = (r & SH4_CCR_ICE);
sh_cache_enable_dcache = (r & SH4_CCR_OCE);
sh_cache_ways = ways;
sh_cache_line_size = SH4_CACHE_LINESZ;
sh_cache_alias_mask = (dcache_size / ways - 1) & ~PAGE_MASK;
sh_cache_prefer_mask = (dcache_size / ways - 1);
sh_cache_write_through_p0_u0_p3 = (r & SH4_CCR_WT);
sh_cache_write_through_p1 = !(r & SH4_CCR_CB);
sh_cache_write_through = sh_cache_write_through_p0_u0_p3 &&
sh_cache_write_through_p1;
sh_cache_ram_mode = (r & SH4_CCR_ORA);
sh_cache_index_mode_icache = (r & SH4_CCR_IIX);
sh_cache_index_mode_dcache = (r & SH4_CCR_OIX);
sh_cache_size_dcache = dcache_size;
if (sh_cache_ram_mode)
sh_cache_size_dcache /= 2;
sh_cache_size_icache = icache_size;
sh_cache_ops._icache_sync_all = sh4_icache_sync_all;
sh_cache_ops._icache_sync_range = sh4_icache_sync_range;
sh_cache_ops._icache_sync_range_index = sh4_icache_sync_range_index;
sh_cache_ops._dcache_wbinv_all = sh4_dcache_wbinv_all;
sh_cache_ops._dcache_wbinv_range = sh4_dcache_wbinv_range;
sh_cache_ops._dcache_wbinv_range_index = sh4_dcache_wbinv_range_index;
sh_cache_ops._dcache_inv_range = sh4_dcache_inv_range;
sh_cache_ops._dcache_wb_range = sh4_dcache_wb_range;
switch (cpu_product) {
case CPU_PRODUCT_7750: /* FALLTHROUGH */
case CPU_PRODUCT_7750S:
/* memory mapped d$ can only be accessed from p2 */
sh_cache_ops._dcache_wbinv_all
= (void *)SH3_P1SEG_TO_P2SEG(sh4_dcache_wbinv_all);
sh_cache_ops._dcache_wbinv_range_index
= (void *)SH3_P1SEG_TO_P2SEG(sh4_dcache_wbinv_range_index);
break;
#if !defined(SH4_CACHE_DISABLE_EMODE)
case CPU_PRODUCT_7750R:
case CPU_PRODUCT_7751R:
if (!(r & SH4_CCR_EMODE)) {
break;
}
sh_cache_ops._icache_sync_all = sh4_emode_icache_sync_all;
sh_cache_ops._icache_sync_range_index = sh4_emode_icache_sync_range_index;
sh_cache_ops._dcache_wbinv_all = sh4_emode_dcache_wbinv_all;
sh_cache_ops._dcache_wbinv_range_index = sh4_emode_dcache_wbinv_range_index;
break;
#endif
}
}
/*
* cache_sh4_op_line_32: (index-operation)
*
* Clear the specified bits on single 32-byte cache line.
*/
static inline void
cache_sh4_op_line_32(vaddr_t va, vaddr_t base, uint32_t mask, uint32_t bits)
{
vaddr_t cca;
cca = base | (va & mask);
_reg_bclr_4(cca, bits);
}
/*
* cache_sh4_op_8lines_32: (index-operation)
*
* Clear the specified bits on 8 32-byte cache lines.
*/
static inline void
cache_sh4_op_8lines_32(vaddr_t va, vaddr_t base, uint32_t mask, uint32_t bits)
{
volatile uint32_t *cca = (volatile uint32_t *)
(base | (va & mask));
cca[ 0] &= ~bits;
cca[ 8] &= ~bits;
cca[16] &= ~bits;
cca[24] &= ~bits;
cca[32] &= ~bits;
cca[40] &= ~bits;
cca[48] &= ~bits;
cca[56] &= ~bits;
}
void
sh4_icache_sync_all(void)
{
vaddr_t va = 0;
vaddr_t eva = SH4_ICACHE_SIZE;
/* d$ index ops must be called via P2 on 7750 and 7750S */
(*sh_cache_ops._dcache_wbinv_all)();
RUN_P2;
while (va < eva) {
cache_sh4_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
va += 32 * 8;
}
/* assume we are returning into a P1 caller */
PAD_P1_SWITCH;
}
void
sh4_icache_sync_range(vaddr_t va, vsize_t sz)
{
vaddr_t ccia;
vaddr_t eva = round_line(va + sz);
va = trunc_line(va);
sh4_dcache_wbinv_range(va, (eva - va));
RUN_P2;
while (va < eva) {
/* CCR.IIX has no effect on this entry specification */
ccia = SH4_CCIA | CCIA_A | (va & CCIA_ENTRY_MASK);
_reg_write_4(ccia, va & CCIA_TAGADDR_MASK); /* V = 0 */
va += 32;
}
/* assume we are returning into a P1 caller */
PAD_P1_SWITCH;
}
void
sh4_icache_sync_range_index(vaddr_t va, vsize_t sz)
{
vaddr_t eva = round_line(va + sz);
va = trunc_line(va);
/* d$ index ops must be called via P2 on 7750 and 7750S */
(*sh_cache_ops._dcache_wbinv_range_index)(va, eva - va);
RUN_P2;
while ((eva - va) >= (8 * 32)) {
cache_sh4_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
va += 32 * 8;
}
while (va < eva) {
cache_sh4_op_line_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
va += 32;
}
/* assume we are returning into a P1 caller */
PAD_P1_SWITCH;
}
void
sh4_dcache_wbinv_all(void)
{
vaddr_t va = 0;
vaddr_t eva = SH4_DCACHE_SIZE;
/* RUN_P2; */ /* called via P2 address if necessary */
while (va < eva) {
cache_sh4_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
(CCDA_U | CCDA_V));
va += 32 * 8;
}
/* assume we are returning into a P1 caller */
PAD_P1_SWITCH;
}
void
sh4_dcache_wbinv_range(vaddr_t va, vsize_t sz)
{
vaddr_t eva = round_line(va + sz);
va = trunc_line(va);
while (va < eva) {
__asm volatile("ocbp @%0" : : "r"(va));
va += 32;
}
}
void
sh4_dcache_wbinv_range_index(vaddr_t va, vsize_t sz)
{
vaddr_t eva = round_line(va + sz);
va = trunc_line(va);
/* RUN_P2; */ /* called via P2 address if necessary */
while ((eva - va) >= (8 * 32)) {
cache_sh4_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
(CCDA_U | CCDA_V));
va += 32 * 8;
}
while (va < eva) {
cache_sh4_op_line_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
(CCDA_U | CCDA_V));
va += 32;
}
/* assume we are returning into a P1 caller */
PAD_P1_SWITCH;
}
void
sh4_dcache_inv_range(vaddr_t va, vsize_t sz)
{
vaddr_t eva = round_line(va + sz);
va = trunc_line(va);
while (va < eva) {
__asm volatile("ocbi @%0" : : "r"(va));
va += 32;
}
}
void
sh4_dcache_wb_range(vaddr_t va, vsize_t sz)
{
vaddr_t eva = round_line(va + sz);
va = trunc_line(va);
while (va < eva) {
__asm volatile("ocbwb @%0" : : "r"(va));
va += 32;
}
}
/*
* EMODE operation
*/
/*
* cache_sh4_emode_op_line_32: (index-operation)
*
* Clear the specified bits on single 32-byte cache line. 2-ways.
*/
static inline void
cache_sh4_emode_op_line_32(vaddr_t va, vaddr_t base, uint32_t mask,
uint32_t bits, uint32_t way_shift)
{
vaddr_t cca;
/* extract entry # */
va &= mask;
/* operate for each way */
cca = base | (0 << way_shift) | va;
_reg_bclr_4(cca, bits);
cca = base | (1 << way_shift) | va;
_reg_bclr_4(cca, bits);
}
/*
* cache_sh4_emode_op_8lines_32: (index-operation)
*
* Clear the specified bits on 8 32-byte cache lines. 2-ways.
*/
static inline void
cache_sh4_emode_op_8lines_32(vaddr_t va, vaddr_t base, uint32_t mask,
uint32_t bits, uint32_t way_shift)
{
volatile uint32_t *cca;
/* extract entry # */
va &= mask;
/* operate for each way */
cca = (volatile uint32_t *)(base | (0 << way_shift) | va);
cca[ 0] &= ~bits;
cca[ 8] &= ~bits;
cca[16] &= ~bits;
cca[24] &= ~bits;
cca[32] &= ~bits;
cca[40] &= ~bits;
cca[48] &= ~bits;
cca[56] &= ~bits;
cca = (volatile uint32_t *)(base | (1 << way_shift) | va);
cca[ 0] &= ~bits;
cca[ 8] &= ~bits;
cca[16] &= ~bits;
cca[24] &= ~bits;
cca[32] &= ~bits;
cca[40] &= ~bits;
cca[48] &= ~bits;
cca[56] &= ~bits;
}
void
sh4_emode_icache_sync_all(void)
{
vaddr_t va = 0;
vaddr_t eva = SH4_EMODE_ICACHE_SIZE;
sh4_emode_dcache_wbinv_all();
RUN_P2;
while (va < eva) {
cache_sh4_emode_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
CCIA_V, 13);
va += 32 * 8;
}
/* assume we are returning into a P1 caller */
PAD_P1_SWITCH;
}
void
sh4_emode_icache_sync_range_index(vaddr_t va, vsize_t sz)
{
vaddr_t eva = round_line(va + sz);
va = trunc_line(va);
sh4_emode_dcache_wbinv_range_index(va, eva - va);
RUN_P2;
while ((eva - va) >= (8 * 32)) {
cache_sh4_emode_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
CCIA_V, 13);
va += 32 * 8;
}
while (va < eva) {
cache_sh4_emode_op_line_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
CCIA_V, 13);
va += 32;
}
/* assume we are returning into a P1 caller */
PAD_P1_SWITCH;
}
void
sh4_emode_dcache_wbinv_all(void)
{
vaddr_t va = 0;
vaddr_t eva = SH4_EMODE_DCACHE_SIZE;
while (va < eva) {
cache_sh4_emode_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
(CCDA_U | CCDA_V), 14);
va += 32 * 8;
}
}
void
sh4_emode_dcache_wbinv_range_index(vaddr_t va, vsize_t sz)
{
vaddr_t eva = round_line(va + sz);
va = trunc_line(va);
while ((eva - va) >= (8 * 32)) {
cache_sh4_emode_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
(CCDA_U | CCDA_V), 14);
va += 32 * 8;
}
while (va < eva) {
cache_sh4_emode_op_line_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
(CCDA_U | CCDA_V), 14);
va += 32;
}
}