View Javadoc

1   /**
2    * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
3    *
4    *   http://www.apache.org/licenses/LICENSE-2.0
5    *
6    * Unless required by applicable law or agreed to in writing, software
7    * distributed under the License is distributed on an "AS IS" BASIS,
8    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9    * See the License for the specific language governing permissions and
10   * limitations under the License.
11   */
12  package org.apache.omid.benchmarks.utils;
13  
14  
15  /**
16   * A generator of a zipfian distribution. It produces a sequence of items, such that some items are more popular than others, according
17   * to a zipfian distribution. When you construct an instance of this class, you specify the number of items in the set to draw from, either
18   * by specifying an itemcount (so that the sequence is of items from 0 to itemcount-1) or by specifying a min and a max (so that the sequence is of 
19   * items from min to max inclusive). After you construct the instance, you can change the number of items by calling nextInt(itemcount) or nextLong(itemcount).
20   *
21   * Unlike @ZipfianGenerator, this class scatters the "popular" items across the itemspace. Use this, instead of @ZipfianGenerator, if you
22   * don't want the head of the distribution (the popular items) clustered together.
23   */
24  public class ScrambledZipfianGenerator extends IntegerGenerator {
25      public static final double ZETAN = 26.46902820178302;
26      public static final double USED_ZIPFIAN_CONSTANT = 0.99;
27      public static final long ITEM_COUNT = 10000000000L;
28  
29      ZipfianGenerator gen;
30      long _min, _max, _itemcount;
31  
32      /******************************* Constructors **************************************/
33  
34      /**
35       * Create a zipfian generator for the specified number of items.
36       * @param _items The number of items in the distribution.
37       */
38      public ScrambledZipfianGenerator(long _items) {
39          this(0, _items - 1);
40      }
41  
42      /**
43       * Create a zipfian generator for items between min and max.
44       * @param _min The smallest integer to generate in the sequence.
45       * @param _max The largest integer to generate in the sequence.
46       */
47      public ScrambledZipfianGenerator(long _min, long _max) {
48          this(_min, _max, ZipfianGenerator.ZIPFIAN_CONSTANT);
49      }
50  
51      /**
52       * Create a zipfian generator for the specified number of items using the specified zipfian constant.
53       *
54       * @param _items The number of items in the distribution.
55       * @param _zipfianconstant The zipfian constant to use.
56       */
57      /*
58  // not supported, as the value of zeta depends on the zipfian constant, and we have only precomputed zeta for one zipfian constant
59  	public ScrambledZipfianGenerator(long _items, double _zipfianconstant)
60  	{
61  		this(0,_items-1,_zipfianconstant);
62  	}
63  */
64  
65      /**
66       * Create a zipfian generator for items between min and max (inclusive) for the specified zipfian constant. If you
67       * use a zipfian constant other than 0.99, this will take a long time to complete because we need to recompute zeta.
68       * @param min The smallest integer to generate in the sequence.
69       * @param max The largest integer to generate in the sequence.
70       * @param _zipfianconstant The zipfian constant to use.
71       */
72      public ScrambledZipfianGenerator(long min, long max, double _zipfianconstant) {
73          _min = min;
74          _max = max;
75          _itemcount = _max - _min + 1;
76          if (_zipfianconstant == USED_ZIPFIAN_CONSTANT) {
77              gen = new ZipfianGenerator(0, ITEM_COUNT, _zipfianconstant, ZETAN);
78          } else {
79              gen = new ZipfianGenerator(0, ITEM_COUNT, _zipfianconstant);
80          }
81      }
82  
83      /**************************************************************************************************/
84  
85      /**
86       * Return the next int in the sequence.
87       */
88      @Override
89      public int nextInt() {
90          return (int) nextLong();
91      }
92  
93      /**
94       * @return the next long in the sequence.
95       */
96      public long nextLong() {
97          long ret = gen.nextLong();
98          ret = _min + FNVhash64(ret) % _itemcount;
99          setLastInt((int) ret);
100         return ret;
101     }
102 
103     public static void main(String[] args) {
104         double newzetan = ZipfianGenerator.zetastatic(ITEM_COUNT, ZipfianGenerator.ZIPFIAN_CONSTANT);
105         System.out.println("zetan: " + newzetan);
106         System.exit(0);
107 
108         ScrambledZipfianGenerator gen = new ScrambledZipfianGenerator(10000);
109 
110         for (int i = 0; i < 1000000; i++) {
111             System.out.println("" + gen.nextInt());
112         }
113     }
114 
115     /**
116      * since the values are scrambled (hopefully uniformly), the mean is simply the middle of the range.
117      */
118     @Override
119     public double mean() {
120         return ((double) (_min + _max)) / 2.0;
121     }
122 
123     /**
124      * 64 bit FNV hash. Produces more "random" hashes than (say) String.hashCode().
125      *
126      * @param val The value to hash.
127      * @return The hash value
128      */
129     public static long FNVhash64(long val) {
130         //from http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
131         long hashval = FNV_offset_basis_64;
132 
133         for (int i = 0; i < 8; i++) {
134             long octet = val & 0x00ff;
135             val = val >> 8;
136 
137             hashval = hashval ^ octet;
138             hashval = hashval * FNV_prime_64;
139             //hashval = hashval ^ octet;
140         }
141         return Math.abs(hashval);
142     }
143 
144     public static final long FNV_offset_basis_64 = 0xCBF29CE484222325L;
145     public static final long FNV_prime_64 = 1099511628211L;
146 
147 }