网页解析数据"/>
// // 链接并获取网页数据 // BOOL GetData(CString URL,CString filename) {HINTERNET hInternet ;HINTERNET hOpenUrl ;hInternet = InternetOpen("Application",INTERNET_OPEN_TYPE_PRECONFIG,NULL, NULL,INTERNET_FLAG_NO_CACHE_WRITE) ;if (hInternet != NULL){hOpenUrl = InternetOpenUrl(hInternet,URL,NULL,0,INTERNET_FLAG_RELOAD,0) ;if (hOpenUrl != NULL)// If Open URL OK{fstream file(filename,ios::out);BOOL bLoop ;DWORD lpdwNumberOfBytesRead, dwSize ;bLoop = true;dwSize = 512 ;LPSTR lpBuffer ;for (int row=1;bLoop;row++){lpBuffer = new char[dwSize+1] ;InternetReadFile(hOpenUrl,(LPVOID)lpBuffer,dwSize,&lpdwNumberOfBytesRead) ;if (lpdwNumberOfBytesRead == 0 || row == 200){bLoop = false ;}else if (row < 86) {continue;}else{lpBuffer[lpdwNumberOfBytesRead] = '/0';//---------------------------------------------int len=MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)lpBuffer, -1, NULL,0); unsigned short* wszGBK = new unsigned short[len+1]; memset(wszGBK, 0, len * 2 + 2); MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)lpBuffer, -1, (LPWSTR)wszGBK, len); len = WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)wszGBK, -1, NULL, 0, NULL, NULL); char *szGBK=new char[len + 1]; memset(szGBK, 0, len + 1); WideCharToMultiByte (CP_ACP, 0, (LPCWSTR)wszGBK, -1, (LPSTR)szGBK, len, NULL,NULL); file<<szGBK;delete szGBK;delete wszGBK;//---------------------------------------------}delete lpBuffer;}file.close();}else{AfxMessageBox("Error: Can not open URL.");return FALSE;}if (hOpenUrl != NULL){InternetCloseHandle(hOpenUrl);}}elseAfxMessageBox("Error: Can not create Internet Session.");if (hInternet != NULL)InternetCloseHandle(hInternet) ;filecount ++ ; //又下载完毕了一个文件if (filecount == 38) {hThreadPD1 = CreateThread(//创建线程1NULL,0,(LPTHREAD_START_ROUTINE)ThreadFuncPD1,NULL,0,&ThreadIDPD1); WaitForSingleObject(hThreadPD1,0); //WaitForSingleObject(hThread,INFINITE);hThreadPD2 = CreateThread(//创建线程1NULL,0,(LPTHREAD_START_ROUTINE)ThreadFuncPD2,NULL,0,&ThreadIDPD2); WaitForSingleObject(hThreadPD2,INFINITE); }return TRUE; }// // 处理获取到的网页数据 // BOOL ProcessData(const char* infile,const char* outfile) {fstream file1,file2;file1.open(infile,ios::in);//读入数据文件 if(!file1){AfxMessageBox("抓取的网页TXT数据文件丢失,请先下载完毕!");return FALSE;}if (strcmp(infile,name101) == 0 || strcmp(infile,name201) == 0) {//第一次写文件 加标题file2.open(outfile,ios::out);file2<<"股票代码/t股票名称/t涨跌幅/t最新价/t涨跌额 "<<endl;///file2.clear();}else{file2.open(outfile,ios::out | ios::app);//写入数据文件}if(!file2){AfxMessageBox("定义的目录在本机不存在!");return FALSE;}file2.clear();char buffer[110];char code[7];char name[10];char old[10];char now[10];char start[10];const int _MAXROW = 3000;bool codeok=false;bool nameok=false;bool oldok =false;bool nowok =false;bool startok =false;for(int row=0;row<_MAXROW && !file1.eof() ; row++){file1.clear();file1.getline(buffer,110,'/n');//从文件获取一行try{for(int i=1;i<110 && buffer[i] ;i++){//分析那一行数据//----------------------------------------------------------------if (!codeok && buffer[i-1] == 'n' && buffer[i] == 'k' && buffer[i+1] == '"' && buffer[i+2] == '>') {//找到了股票代码for(int j=0;j<6;j++,i++)code[j] = buffer[i+3];//入数组code[6]=NULL;codeok = true;break;//分析一行数据结束}//----------------------------------------------------------------if (codeok && !nameok && buffer[i] == 'k' && buffer[i+1] == '"' && buffer[i+2] == '>') {//找到了股票的名称for(int j=0;j<8;j++,i++){name[j] = buffer[i+3];//入数组if (buffer[i+4]=='<') break;}name[j+1]=NULL;nameok = true;break;//分析一行数据结束}//----------------------------------------------------------------if (codeok && !nowok && buffer[i-1] == '"' && buffer[i] == '>') {//找到了股票的当前价格for(int j=0;buffer[i]!='<';j++,i++)now[j] = buffer[i+1];//入数组now[j-1]=NULL;nowok = true;break;//分析一行数据结束}//----------------------------------------------------------------if (codeok && !oldok && buffer[i-1] == '"' && buffer[i] == '>') {//找到了股票的涨跌幅for(int j=0;buffer[i]!='<';j++,i++)old[j] = buffer[i+1];//入数组old[j-1]=NULL;oldok = true;break;//分析一行数据结束}//----------------------------------------------------------------if (codeok && !startok && buffer[i-1] == '"' && buffer[i] == '>') {//找到了股票的涨跌额for(int j=0;buffer[i]!='<';j++,i++)start[j] = buffer[i+1];//入数组start[j-1]=NULL;startok = true;//最后一个不要break }//----------------------------------------------------------------if (codeok && nameok && nowok && oldok && startok) {//所有数据获取完毕codeok = false;nameok = false;nowok = false;oldok = false;startok =false;file2<<code<<"/t/t"<<name<<"/t"<<old<<"/t/t"<<now<<"/t/t"<<start<<"/t"<<endl;break;//一行结束,也是一只股票读取结束}}//分析一行throw i;}//trycatch (int) {}}//分析文件file1.close(); file2.close();return TRUE; }
更多推荐
VC++抓取网页解析数据
>www.elefans.com
编程频道|电子爱好者 - 技术资讯及电子产品介绍!
发布评论