我正在从excel中提取这种格式的数据
I am extracting data from excel that is in this format
product1 | unnamedcol2 | product2 | unnamedcol4 | product3 | unnamedcol6 | ------------------------------------------------------------------------------- @1foo | 1.10 | @1foo | 0.3 | @1foo | 0.3 @2foo | 1.00 | @2foo | 2 | @2foo | @3foo | 1.52 | @3foo | 2.53 | @3foo | @4foo | 1.47 | | | @4foo | 1.31 @5foo | 1.49 | | | @5foo | 1.31该文件使用所有255个字段.我使用dapper-dot-net通过此代码获取数据
The file uses all 255 fields. Using dapper-dot-net i get the data through this code
IEnumerable<IDictionary<string, object>> excelDataRaw = conn.Query(string.Format("select * from {0}", table)).Cast<IDictionary<string, object>>();我将此数据传递给这些测试方法.数据作为IDictionaries的IEnumerable返回,其中每个键是一个产品,每个值是一个IDictionary,其中每个键是来自产品列的值,而对应的值是来自unnamedcol的值,该值位于产品列的右侧.
I pass this data to these test methods. The data is returned as an IEnumerable of IDictionaries where each key is a product and each value is an IDictionary where each key is a value from the product column and the corresponding value is a value from unnamedcol that is to the right of the product column.
var excelDataRefined = new List<IDictionary<string, IDictionary<string, decimal>>>(); excelDataRefined.Add(new Dictionary<string, IDictionary<string, decimal>>()); excelDataRefined[0].Add( "product", new Dictionary<string, decimal>()); excelDataRefined[0]["product"].Add("@1foo", 1.1m);方法:
private static Dictionary<string, IDictionary<string, decimal>> Benchmark_foreach(IEnumerable<IDictionary<string, object>> excelDataRaw) { Console.WriteLine("1. Using foreach"); var watch = new Stopwatch(); watch.Start(); List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList(); bool isEven = false; List<string> products = headers.Where(h => isEven = !isEven).ToList(); var dates = new List<IEnumerable<object>>(); var prices = new List<IEnumerable<object>>(); foreach (string field in headers) { string product1 = field; if (headers.IndexOf(field) % 2 == 0) { dates.Add( excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1]).Where(row => row != null)); } if (headers.IndexOf(field) % 2 == 1) { prices.Add( excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1] ?? 0m).Take(dates.Last().Count())); } } watch.Stop(); Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds); watch.Restart(); var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>(); foreach (IEnumerable<object> datelist in dates) { decimal num; IEnumerable<object> datelist1 = datelist; IEnumerable<object> pricelist = prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where( content => decimal.TryParse(content.ToString(), out num)); Dictionary<string, decimal> dict = datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary( x => (string)x.k, x => decimal.Parse(x.v.ToString())); if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)])) { excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict); } } watch.Stop(); Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds); return excelDataRefined; } private static Dictionary<string, IDictionary<string, decimal>> Benchmark_AsParallel(IEnumerable<IDictionary<string, object>> excelDataRaw) { Console.WriteLine("2. Using AsParallel().AsOrdered().ForAll"); var watch = new Stopwatch(); watch.Start(); List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList(); bool isEven = false; List<string> products = headers.Where(h => isEven = !isEven).ToList(); var dates = new List<IEnumerable<object>>(); var prices = new List<IEnumerable<object>>(); headers.AsParallel().AsOrdered().ForAll( field => dates.Add( excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select( col => col[field]).Where(row => row != null).ToList())); headers.AsParallel().AsOrdered().ForAll( field => prices.Add( excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select( col => col[field] ?? 0m).Take(256).ToList())); dates.RemoveAll(x => x.Count() == 0); prices.RemoveAll(x => x.Count() == 0); watch.Stop(); Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds); watch.Restart(); var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>(); foreach (IEnumerable<object> datelist in dates) { decimal num; IEnumerable<object> datelist1 = datelist; IEnumerable<object> pricelist = prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where( content => decimal.TryParse(content.ToString(), out num)); Dictionary<string, decimal> dict = datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary( x => (string)x.k, x => decimal.Parse(x.v.ToString())); if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)])) { excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict); } } watch.Stop(); Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds); return excelDataRefined; } private static Dictionary<string, IDictionary<string, decimal>> Benchmark_ForEach(IEnumerable<IDictionary<string, object>> excelDataRaw) { Console.WriteLine("3. Using ForEach"); var watch = new Stopwatch(); watch.Start(); List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList(); bool isEven = false; List<string> products = headers.Where(h => isEven = !isEven).ToList(); var dates = new List<IEnumerable<object>>(); var prices = new List<IEnumerable<object>>(); headers.ForEach( field => dates.Add( excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(col => col[field]).Where( row => row != null).ToList())); headers.ForEach( field => prices.Add( excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(col => col[field] ?? 0m). Take(256).ToList())); dates.RemoveAll(x => x.Count() == 0); prices.RemoveAll(x => x.Count() == 0); watch.Stop(); Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds); watch.Restart(); var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>(); foreach (IEnumerable<object> datelist in dates) { decimal num; IEnumerable<object> datelist1 = datelist; IEnumerable<object> pricelist = prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where( content => decimal.TryParse(content.ToString(), out num)); Dictionary<string, decimal> dict = datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary( x => (string)x.k, x => decimal.Parse(x.v.ToString())); if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)])) { excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict); } } watch.Stop(); Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds); return excelDataRefined; }
- Benchmark_foreach需要应用. 3,5s重新排列,3s压缩数据.
- Benchmark_AsParallel需要应用.重新排列12s,压缩数据0.005s.
- Benchmark_ForEach需要应用.重新排列16s,压缩数据0.005s.
为什么会这样?我期望AsParallel最快,因为它并行执行而不是顺序执行.我该如何优化呢?
Why does it behave like this? I expected AsParallel to be the fastest because it executes in parallel instead of sequential. Ho do i optimize this?
推荐答案要进行并行计算,您必须具有多个处理器或内核,否则,您只是在线程池中排队等待CPU的任务. IE.单核心计算机上的AsParallel是顺序的,加上线程池和线程上下文切换的开销.即使在两核计算机上,您也可能无法同时获得两个核,因为许多其他事情都在同一计算机上运行.
In order for parallel computation to happen you have to have multiple processors or cores, otherwise you are just queueing up tasks in the threadpool waiting for the CPU. I.e. AsParallel on a single core machine is sequential plus the overhead of threadpool and thread context switch. Even on a two core machine, you may not get both cores, since lots of other things are running on the same machine.
仅当您具有带有阻塞操作(I/O)的长时间运行任务时,真正的.AsParallel()才有用,操作系统可以挂起阻塞线程并让另一个线程运行.
Really .AsParallel() only becomes useful if you have long running tasks with blocking operations (I/O) where the OS can suspend the blocking thread and let another one run.
更多推荐
在这种情况下,为什么使用AsParallel()比foreach慢?
发布评论