归并排序算法描述+代码实现
归并排序,主要的是一种思想,在处理大数据的时候比较的常见的一种思想。例如:一个10G的关键词的log,找出词频最高的前K个词,设可用内存为2G左右。
归并思想:将数据分为5段,分段时对每个词hash,hash值在一个范围中的词语分到一个段中,然后对于每个分段统计的hash结果直接都写入一个文件即可。
分析:使用hash分段,保证各小段没有重复的词。我当时想的方法是将词语的首字拼音打头一样的分在一段中,例如“五谷丰登”、“万箭齐发”分到一起,都是w打头的拼音,其实这仅仅只是hash的一种,至于具体的hash函数,就不讨论了
/**
* @author zhailz
* 归并排序,稳定的算法,最好的情况O(nlogn),最坏的情况O(nlogn),平均的情况是O(nlogn)
*/
public class MergeSort {
public static void main(String[] args) {
int[] arrays = new int[] { 5, 1, 6, 2, 4, 5, Integer.MAX_VALUE, 7, 0, Integer.MAX_VALUE, 2, 3,
5, 7, 0, 1, 2, 3, 8 };
System.out.println(Arrays.toString(arrays));
System.out.println(arrays.length);
arrays = mergeSort(arrays);
System.out.println(Arrays.toString(arrays));
System.out.println(arrays.length);
}
public static int[] mergeSort(int[] arrays) {
return arrayMergeSort(arrays, 0, arrays.length - 1);
}
private static int[] arrayMergeSort(int[] arrays, int i, int j) {
if (i < 0 || j > arrays.length - 1) {
throw new IllegalArgumentException("");
}
if (i < j) {
int m = (i + j) / 2;
arrayMergeSort(arrays, i, m);
arrayMergeSort(arrays, m + 1, j);
arrayMergeSortContentNoFlag(arrays, i, m, j);
}
return arrays;
}
public static void arrayMergeSortContent(int[] arrays, int i, int m, int j) {
int first = m - i + 1;
int second = j - m;
int[] firsta = new int[first + 1];
int[] seconda = new int[second + 1];
for (int k = i; k <= m; k++) {
firsta[k - i] = arrays[k];
}
for (int k = m + 1; k <= j; k++) {
seconda[k - m - 1] = arrays[k];
}
firsta[first] = Integer.MAX_VALUE;
seconda[second] = Integer.MAX_VALUE;
int ff = 0;
int ss = 0;
for (int k = i; k <= j; k++) {
if (firsta[ff] < seconda[ss]) {
arrays[k] = firsta[ff];
ff++;
} else {
arrays[k] = seconda[ss];
ss++;
}
}
}
public static void arrayMergeSortContentNoFlag(int[] arrays, int i, int m, int j) {
int first = m - i + 1;
int second = j - m;
int[] firsta = new int[first];
int[] seconda = new int[second];
for (int k = i; k <= m; k++) {
firsta[k - i] = arrays[k];
}
for (int k = m + 1; k <= j; k++) {
seconda[k - m - 1] = arrays[k];
}
int ff = 0;
int ss = 0;
int k = i;
for (; k <= j && ff < firsta.length && ss < seconda.length; k++) {
if (firsta[ff] < seconda[ss]) {
arrays[k] = firsta[ff];
ff++;
} else {
arrays[k] = seconda[ss];
ss++;
}
}
if (ff <= firsta.length - 1) {
for (; ff < firsta.length; k++, ff++) {
arrays[k] = firsta[ff];
}
}
if (ss <= seconda.length - 1) {
for (; ff < seconda.length; k++, ss++) {
arrays[k] = seconda[ss];
}
}
}
}