#Copyright (C)1991-2003 Altera Corporation
#Any megafunction design, and related net list (encrypted or decrypted),
#support information, device programming or simulation file, and any other
#associated documentation or information provided by Altera or a partner
#under Altera's Megafunction Partnership Program may be used only to
#program PLD devices (but not masked PLD devices) from Altera.  Any other
#use of such megafunction design, net list, support information, device
#programming or simulation file, or any other related documentation or
#information is prohibited for any other purpose, including, but not
#limited to modification, reverse engineering, de-compiling, or use with
#any other silicon devices, unless such use is explicitly licensed under
#a separate agreement with Altera or a megafunction partner.  Title to
#the intellectual property, including patents, copyrights, trademarks,
#trade secrets, or maskworks, embodied in any such megafunction design,
#net list, support information, device programming or simulation file, or
#any other related documentation or information provided by Altera or a
#megafunction partner, remains with Altera, the megafunction partner, or
#their respective licensors.  No other licenses, including any licenses
#needed under any third party's intellectual property, are provided herein.
#Copying or modifying any file, or portion thereof, to which this notice
#is attached violates this copyright.

use e_bdpram;
use europa_all;
use strict;

################################################################
# Nios Data Cache
#
# This implementation of the Nios data cache is inserted between
# the Nios (16 or 32 bit) data master and the Avalon bus.  It is
# a simple direct-mapped architecture with lines (whose width match the
# size of Nois: 16 or 32 bit).  Each line contains a single datum.
#
# The dc_* ports connect to the Nios core.  The d_* ports connect to the
# Avalon bus.  When the cache is disabled, corresponding port pairs are
# connected combinatorially, and memory accesses act as if the cache
# does not exist.  This implementation does not snoop data-master
# writes, and so cache coherency must be managed by the application.
#
# The current Nios SDRAM controller does not support latency.  
# And that's fine, because the Nios data master also does not support latency.
#
# Note: This particular cache was modified from KVEENSTRA's original by
# TWAYNE.  Modifications included addition of a set_invalidate feature,
# and a rewrite of the control FSM's using e_case.  Thu Sep 19 15:38:38  2002
#
# Further changes include parameterizing the dcache_data_width (to be 16
# or 32, as Nios demands), and using a c_suppress input from the CPU to
# suppress a Read Hit (force a miss/load)
################################################################

################################################################
# Data cache memory is now e_bdpram
################################################################

# FSM Setup:
my $fsm_codes = 4;

### NB: &one_hot_encoding now in europa_utils.pm!

my @CC = &one_hot_encoding ($fsm_codes);
my @CC_BITS = (0 .. ($fsm_codes -1));

# DCache Control/FSM submodule:
sub make_data_cache_control
{
  my ($Opt, $project) = (@_);

  my $module = e_module->new ({name => $Opt->{name}."_dcache_control"});
  $project->add_module ($module);      

  my $marker = e_default_module_marker->new ($module);

  e_port->adds (
    [dc_read            => 1, "in" ],
    [dc_waitrequest     => 1, "out"],

    [d_read             => 1, "out"],
    [d_waitrequest      => 1, "in" ],
    
    [enable_cache       => 1, "in" ],
    [hit                => 1, "in" ],
    [use_cache_data     => 1, "out"],  # LED2 pin T19
    [write_to_cache     => 1, "out"],
    
  # [busy               => 1, "out"],  # feed up to invalidate-suppress logic
  );

  # NB: twayne, Mon Jan 27 10:32:21  2003
  # i_am_verilog is used in the FSM case statement to optimize away the
  # default case for synthesis ONLY for verilog -- vhdl synth requires it!
  # my $lang = $project->system_ptf()->{WIZARD_SCRIPT_ARGUMENTS}{hdl_language};
  # my $i_am_verilog;
  # if   ($lang =~ /verilog/i) { $i_am_verilog = 1; }
  # else { $i_am_verilog = 0; }
  # NB: This was all commented out because it is forced in e_case as of 1/24/03

  # The main FSM:
  # state CC[0] is IDLE -- wait for read or write
  # state CC[1] is WAIT -- for pending write due to a read miss, or a write)
  # state CC[2] is HIT? -- wait exactly once cycle for 'hit' logic to ripple
  # state CC[3] is EXIT -- wait exactly one cycle for write to complete.
  #
  # Basic FSM movement should look like this:
  # Case Read Hit : CC[0] -> CC[2] -> CC[0]
  # Case Read Miss: CC[0] -> CC[2] -> CC[1] (1+ times) -> CC[3] -> CC[0]
  # Case Write    : CC[0] -> CC[1] (1+ times) -> CC[3] -> CC[0]
  #
  # The HIT? state was added to cut the tag_compare -> hit long timing path
  # The EXIT state was added to allow the altsyncram address_a ports to
  # switch from 'write' address back to 'read' address for the next cycle.
  # Note that Read Hit's are the Optimal case (2 cycles).  The minimum
  # Write cycle takes 3 states
  
  # state-machine registers
  e_signal->adds ([cc      => 4],
                  [cc_next => 4],);

  e_register->adds 
      ({out => "cc", in => "cc_next", enable => undef, async_value => $CC[0]},
      );

  # Switched to e_if to eliminate SPR 118394, Quartus message...
  e_process->add({
    clock   => "",
    contents=> [
      e_if->new ({
        comment   => " IDLE",
        condition => "cc[$CC_BITS[0]]",
        then => 
            [
             e_if->new({
                 comment  => " Do nothing if cache disabled",
                 condition=> "enable_cache",
                 then     =>
                     [ # Reads and writes never happen simultaneously
                       e_if->new({
                           comment  => " READ: Go to wait on a Miss;".
                               "else data comes back from the RAM",
                               condition=> "dc_read",
                               then     => ["cc_next" => $CC[2]],
                               elsif    => ({
                                   comment  => " WRITE: Wait for write-thru.",
                                   condition=> "dc_write",
                                   then     => ["cc_next" => $CC[1]],
                                   else     => ["cc_next" => $CC[0]],
                               }),
                           }),
                       ],
                 else     => ["cc_next" => $CC[0]],
             }),
             ],
        elsif => {
          comment   => " WAIT",
          condition => "cc[$CC_BITS[1]]",
          then =>
              [
               e_if->new({
                   comment  => " Wait for Rd Data",
                   condition=> "!d_waitrequest",
                   then     => ["cc_next" => $CC[3]],
                   else     => ["cc_next" => $CC[1]],
               }),
               ],
          elsif => {
              comment   => " HIT?",
              condition => "cc[$CC_BITS[2]]",
              then =>
                  [
                   e_if->new({
                       comment  => " If Hit => done, else Wait for Rd Data.",
                       condition=> "hit",
                       then     => ["cc_next" => $CC[0]],
                       else     => ["cc_next" => $CC[1]],
                   }),
                   ],
              else => [cc_next => $CC[0]],
          },
        },
    }),  # end of e_if
    ],  # end of contents
  });

  # we only use_cache_data during read hits
  e_assign->add({
      lhs => "use_cache_data",
      rhs => "enable_cache & dc_read & hit",
  });
  # if our next state is not IDLE, we better be stalling the CPU!  We
  # use 'cc_next' so that we get the immediate rd->wait turnaround
  # required by avalon
  e_assign->add({
      lhs => "dc_waitrequest",
      rhs => "enable_cache ? (!cc_next[$CC_BITS[0]]) : d_waitrequest",
  });
  # latent masters use waitrequest as data acknowledgement.  Use FSM state to
  # know exactly when we are going to the 'write recovery (FSM EXIT)' state
  # determine when to pulse write_to_cache for exactly one cycle.  It's
  # not valid to compare dc_write to d_waitrequest depending on some CPU modes.
  e_assign->add({
      lhs => "write_to_cache",
      rhs => "cc_next[$CC_BITS[3]]",
  }); # "(!d_waitrequest & (dc_write | d_read))",
  # only assert d_read when we are waiting for readback data.
  e_assign->add({
      lhs => "d_read",
      rhs => "enable_cache ? cc[$CC_BITS[1]] & dc_read : dc_read",
  });
  # do not continue to assert d_write during FSM EXIT state.  dc_write
  # and dc_waitrequest are each one cycle longer than d_write and d_waitrequest
  e_assign->add({
      lhs => "d_write",
      rhs => "dc_write & (!cc[$CC_BITS[3]])",
  });

  return $module;
}

sub make_data_cache
{
    my ($Opt, $project) = (@_);

    my @submodules =(&make_data_cache_control($Opt, $project),
                     );
   
    my $module = e_module->new ({name => $Opt->{name}."_dcache"});
    $project->add_module ($module);      

    my $marker = e_default_module_marker->new ($module);

    foreach my $submod (@submodules) 
        { e_instance->add({module => $submod->name()}); }


    ############
    # PARAMETERS
    ############

    # calculate set width from WSA ($Opt...):
    # k_size is [1,2,4,8], and needs to be converted to kilo...
    # b_size is [16(NY),32(NJ)] how many bytes per line
    # size is (k*1024)/b
    # set_width is log2(size);
    my $k_size = $Opt->{cache_dcache_size_k};
    my $b_size = $Opt->{CONSTANTS}{CONSTANT}{nasys_dcache_line_size}{value};
    my $size = ($k_size * 1024)/$b_size;
    my $dcache_set_width = log2($size);

    # Make data a parameterizable width, 16 or 32, based upon how wide
    # our nios is...
    my $dcache_data_width = 32; # default to 32 for now, but use var...

    # The data address is split into a set field and a tag field.
    # Bit 0 of the data address is ignored for data width 16
    # Bit [1:0] of data address is ignored for data width 32
    # The set field is the $dcache_set_width bits above bit 0 (or [1:0]).
    # The tag field is the MSBs above that.
    #
    # |<---- $dcache_tag_width ---->|<----- $dcache_set_width ---->|
    # |_____________________________|______________________________|_
    # |                             |                              | |
    # |             tag             |              set             | |
    # |_____________________________|______________________________|_|
    #  d_Address_Width - 1   ...     dcache_set_width     ...     2 10
    #
    # Calculate the bit positions of the set and tag fields within dc_address.
    # (data_width == 16) ==> 1; (data_width == 32) ==> 2
    my $set_lsb = $dcache_data_width >> 4;
    my $address_width = $Opt->{d_Address_Width};
    my $dcache_tag_width = $address_width - ($dcache_set_width + $set_lsb);
    my $set_msb = $set_lsb + $dcache_set_width - 1;
    my $tag_lsb = $set_msb + 1;
    my $tag_msb = $tag_lsb + $dcache_tag_width - 1;

    if ($dcache_tag_width < 2)
    {
        die ("\nMaximum DCache size must be 25% or less".
             " of Total Memory Map Size!\n".
             "  Current Memory size is ".((2 ** $address_width)/1024).
             " kbytes\n".
             "  Current DCache size is ".$k_size." kbytes\n".
             "Please adjust cache size and regenerate.\n");
    }

    # The cache line consists of three fields: data, tag, and valid.
    # The data field is always 16 or 32 bits; the valid bit is always one bit.
    #
    # | |<---- $dcache_tag_width ---->|<--------- 16 or 32 --------->|
    # |_|_____________________________|______________________________|
    # | |                             |                              |
    # |v|             tag             |             data             |
    # |_|_____________________________|______________________________|
    #
    my $dcache_line_length = 1 + $dcache_tag_width + $dcache_data_width;

    # Byte enable lines are required for this master:
    my $dcache_byteena_width = $dcache_data_width >> 3; # divide by 8.

    # print "\n".$Opt->{name}." Data Cache: ".
    #     "Addr Bits = ".$Opt->{d_Address_Width}."; ".
    #     "Data Bits = $dcache_data_width; ".
    #     "Line Bits = $dcache_line_length\n";
    # print $Opt->{name}." Data Cache: ".
    #     "Tag Bits = $dcache_tag_width (Addr[$tag_msb:$tag_lsb]); ".
    #     "Set Bits = $dcache_set_width (Addr[$set_msb:$set_lsb])\n";

    ###########
    # PORTS:
    # dc_* ports are cpu-side master ports
    # d_* ports are avalon side master ports
    # c_* ports are CPU write controls which affect the cache, but not avalon.
    ###########

    e_port->adds (
          [dc_read          => 1,                       "in" ],
          [dc_read_pre      => 1,                       "in" ],
          [dc_address_pre   => $address_width,          "in" ],
          [dc_address       => $address_width,          "in" ],
          [c_suppress       => 1,                       "in" ],
          [c_enable_cache   => 1,                       "in" ],
          [c_invalidate     => 1,                       "in" ],
          [c_invalid_set    => $dcache_set_width,       "in" ],
          [dc_waitrequest   => 1,                       "out"],
          [dc_readdata      => $dcache_data_width,      "out"],

          [dc_write         => 1,                       "in" ],
          [dc_writedata     => $dcache_data_width,      "in" ],
          [dc_byteenable    => $dcache_byteena_width,   "in" ],

          [d_write          => 1,                       "out"],
          [d_writedata      => $dcache_data_width,      "out"],
          [d_byteenable     => $dcache_byteena_width,   "out"],

          [d_read           => 1,                       "out"],
          [d_address        => $address_width,          "out"],
          [d_waitrequest    => 1,                       "in" ],
          [d_readdata       => $dcache_data_width,      "in" ],

          # [enable_cache     => 1,                       "out"],  # T18
          # [use_cache_data   => 1,                       "out"],  # T19
    );

    ###########
    # DATAPATH
    ###########

    # Define the output fields of the cache memory.
    e_signal->adds ([cache_valid => 1 ],
                    [cache_tag   => $dcache_tag_width ],
                    [cache_data  => $dcache_data_width],
                    [writedata   => $dcache_data_width],
                    [readdata    => $dcache_data_width],
                    [d_readdata_d=> $dcache_data_width],
                    [writevalid  => 1 ],
                    [dc_clk_en   => 1 ],
                    );

    # Extract fields from dc_address and dc_address_pre
    e_signal->adds ([dc_set      => $dcache_set_width],
                    [dc_set_pre  => $dcache_set_width],
                    [dc_tag      => $dcache_tag_width],
                    );

    e_assign->adds (["dc_set",     "dc_address    \[$set_msb:$set_lsb\]"],
                    ["dc_set_pre", "dc_address_pre\[$set_msb:$set_lsb\]"],
                    ["dc_tag",     "dc_address    \[$tag_msb:$tag_lsb\]"],
                    );

    # register copy of cache_enable to improve timing.
    e_register->add
        ({out => "enable_cache", in => "c_enable_cache", enable => undef});

    # delay d_readdata to compensate for cache write recovery (FSM EXIT state)
    e_register->adds
      ({out => "d_readdata_d", in => "d_readdata",
        enable => undef, async_value => $dcache_data_width."'b0"},
       );

    # choose d_readdata vs. d_readdata_d based on cache_enable
    e_assign->add
        (["readdata" => "enable_cache ? d_readdata_d : d_readdata"]);

    # The dc_readdata mux chooses between cache data and slave data.
    e_assign->add 
        (["dc_readdata" => "use_cache_data ? cache_data : readdata"]);

    # Mux outgoing write data to altsyncram if we're writing,
    # else default to read data as input to altsyncram.
    e_assign->add 
        (["writedata" => "dc_write ? dc_writedata : d_readdata"]);

    # Create writevalid bit to invalidate line if this is a byte write.
    # A Byte Read or Write will potentially have junk data on unused
    # bytes, so we invalidate the corresponding cache line so the next
    # read forces the line to be refreshed.
    e_assign->add
        (["writevalid" =>
          "enable_cache && ".
          "( ((dc_write | dc_read) && &dc_byteenable && !c_suppress_d) )"]);

    # create a qualilfied version of dc_read_pre to use as part of
    # altsyncram clockenable: 
    e_assign->add
        (["dc_clk_en" => "dc_read_pre & !dc_waitrequest"]);

    # For performance reasons, the cache assumes that one of the inputs
    # of the mux driving the dc_address register is selected.  Delay the
    # set field of d_address to allow checking of this assumption.
    # The enable of this register must match the clock enable of the cache mem.
    e_signal->add ([set => $dcache_set_width]);
    e_register->adds 
        ( # set is our own internal register'd copy of dc_set
          {out    => "set",
           in     => "dc_set_pre",
           enable => "dc_clk_en", },  # was dc_address_clken
          # delay c_suppress to match set, dc_set (reg'd versions of dc_addr)
          {out    => "c_suppress_d",
           in     => "c_suppress",
           enable => "!dc_waitrequest", }, # supression is for 1 read OR write
          );

    # Register all sub-elements of 'hit' (FSM waits extra cycle for this) so
    # we can defeat a long timing path with all the compares...
    e_register->adds
      ({out => "set_match", in => "(set == dc_set)",
        enable => undef, async_value => "1'b0"},
       {out => "tag_match", in => "(cache_tag == dc_tag)",
        enable => undef, async_value => "1'b0"},
       {out => "cache_valid_d", in => "cache_valid",
        enable => undef, async_value => "1'b0"},
       {out => "c_suppress_d_d", in => "c_suppress_d",
        enable => undef, async_value => "1'b0"},
      );

    # Recognize a hit -- suppress hit if so directed by the CPU!
    e_assign->add(["hit" =>
                   "set_match & tag_match & cache_valid_d & !c_suppress_d_d"]);

    # These signals pass through the cache unchanged.
    e_assign->adds (["d_address", "dc_address"],
                    ["d_writedata", "dc_writedata"],
                    ["d_byteenable", "dc_byteenable"],
                    );

    ############
    # CACHE RAM
    ############

    e_signal->adds(
        {
            name => 'q_a',
            width => $dcache_line_length,
            never_export => 1,
        },
        {
            name => 'q_b',
            width => $dcache_line_length,
            never_export => 1,
        },
    );

    e_assign->adds
        (
         ["cache_valid" => "q_a\[".($dcache_line_length - 1)."\]"],
         ["cache_tag"   => "q_a\[".
          (($dcache_tag_width + $dcache_data_width) - 1).
          ":$dcache_data_width\]"],
         ["cache_data"  => "q_a\[".($dcache_data_width - 1).":0\]"],
         );

    # A Port is used for both write and read cycles;
    # B Port is used only for line invalidates.
    # That's why enable_cache appears the way it does with both clocken's
    # It's also why clocken0 (for port A) has (write_to_cache | dc_clk_en):
    #      The first clocks in an address to write to; the second to read from
    my $clear_string = $dcache_tag_width + $dcache_data_width . "{1'b0}";
    my %port_map = (
        wren_a    => "write_to_cache",
        wren_b    => "c_invalidate",
        data_a    => "{writevalid, dc_tag, writedata}",
        data_b    => "{1'b0, {".$clear_string."}}",
        address_a => "write_to_cache ? dc_set : dc_set_pre",
        address_b => "c_invalid_set",
        clock0    => "clk",
        clock1    => "clk",
        clocken0  => "enable_cache & (write_to_cache | dc_clk_en)",
        clocken1  => "!enable_cache",
        # no byte enables for dcache.
        q_a       => "q_a",
    );

    e_bdpram->add(
                  {module          => $Opt->{name}."_dcache_memory_module",
                   name            => $Opt->{name}."_dcache_memory",
                   port_map        => \%port_map,                        
                   a_data_width    => $dcache_line_length,
                   b_data_width    => $dcache_line_length,
                   a_address_width => $dcache_set_width,
                   b_address_width => $dcache_set_width,
                  }
                  );


    return $module;
}

qq{
Just detach from all sound and form,
And do not dwell in detachment,
And do not dwell in intellectual understanding,
This is practice. 
- Baizhang 
};
