`

[记录]千万数据存入mysql

 
阅读更多

存入1千万条数据,用hibernate存

数据的定义都很简单:

    create table testEntity2 (
        id varchar(255) not null,
        description varchar(255),
        name varchar(255),
        primary key (id)
    )

 主键生成策略UUID

 

防止数据过大 

设置  <property name="hibernate.jdbc.batch_size">50</property> 

同时代码里也是50次 flush一下 clear一下 在eclipse的环境中 内存占用在500M左右

 

每10w条打印一次 如果不使用batch_size 也不flush和clear 内存会在2G左右

并且也会发生内存溢出的错误:(但前面几次的速度非常可观 到第280万条的时候堆溢出)

但这些只是存在session的缓存里 没有真正uncommit到数据库 所以到底有多快...不好说...

正在插入
1:1
2:2
3:2
4:3
5:3
6:5
7:5
8:7
9:7
10:7
11:8
12:8
13:8
14:11
15:11
16:11
17:12
18:15
19:15
20:15
21:16
22:16
23:17
24:17
25:24
26:28
27:35
28:49
Exception in thread "main" java.lang.OutOfMemoryError: GC overhead limit exceeded
	at org.hibernate.engine.internal.StatefulPersistenceContext.addEntry(StatefulPersistenceContext.java:539)
	at org.hibernate.event.internal.AbstractSaveEventListener.performSaveOrReplicate(AbstractSaveEventListener.java:249)
	at org.hibernate.event.internal.AbstractSaveEventListener.performSave(AbstractSaveEventListener.java:192)
	at org.hibernate.event.internal.AbstractSaveEventListener.saveWithGeneratedId(AbstractSaveEventListener.java:135)
	at org.hibernate.event.internal.DefaultSaveOrUpdateEventListener.saveWithGeneratedOrRequestedId(DefaultSaveOrUpdateEventListener.java:206)
	at org.hibernate.event.internal.DefaultSaveEventListener.saveWithGeneratedOrRequestedId(DefaultSaveEventListener.java:55)
	at org.hibernate.event.internal.DefaultSaveOrUpdateEventListener.entityIsTransient(DefaultSaveOrUpdateEventListener.java:191)
	at org.hibernate.event.internal.DefaultSaveEventListener.performSaveOrUpdate(DefaultSaveEventListener.java:49)
	at org.hibernate.event.internal.DefaultSaveOrUpdateEventListener.onSaveOrUpdate(DefaultSaveOrUpdateEventListener.java:90)
	at org.hibernate.internal.SessionImpl.fireSave(SessionImpl.java:764)
	at org.hibernate.internal.SessionImpl.save(SessionImpl.java:756)
	at org.hibernate.internal.SessionImpl.save(SessionImpl.java:752)
	at org.cc.data.test.DataGenerator.main(DataGenerator.java:25)

 

 

然后分别做一下测试 先把batch_size设置为10 每10次 flush和clear:

正在插入
1:37
2:72
3:108
4:144
.......

 50和100的测试和这个结果也大同小异就不测试了

 

 

 

 

其他的以后再写了..

 

 

 

 

代码:

package org.cc.data.test;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.Id;
import javax.persistence.Table;

import org.hibernate.annotations.GenericGenerator;

@Entity
@Table(name="testEntity2")
public class TestEntity {
	
	@Id
	@GenericGenerator(strategy="uuid", name = "uuid")
	@GeneratedValue(generator="uuid")
	private String id;
	
	@Column(name="name")
	private String name;
	
	@Column(name="description")
	private String description;
 
	public TestEntity(String name, String description) {
		super();
		this.name = name;
		this.description = description;
	}

	public String getId() {
		return id;
	}

	public void setId(String id) {
		this.id = id;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public String getDescription() {
		return description;
	}

	public void setDescription(String description) {
		this.description = description;
	}


	
}

 

package org.cc.data.test;


import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.hibernate.Transaction;
import org.hibernate.cfg.Configuration;
import org.hibernate.service.ServiceRegistry;
import org.hibernate.service.ServiceRegistryBuilder;

public class DataGenerator {

	public static void main(String[] args) {
		 Configuration cfg = new Configuration().configure();
		 ServiceRegistry sr = new ServiceRegistryBuilder().applySettings(
				cfg.getProperties()).buildServiceRegistry();
		 SessionFactory sf=cfg.buildSessionFactory(sr);
         Session session=sf.openSession();
         Transaction trans=session.beginTransaction();
         long timeNow=System.currentTimeMillis();
         System.out.println("正在插入");
         int count=0;
         for(int i=0;i<10000000;i++){
        	 TestEntity tes=new TestEntity("cc"+i,"desc:"+i);
        	 session.save(tes);
        	 ++count;
        	 if ((count) % 50 == 0 ) {//100,与JDBC批量设置相同
     	        session.flush();
     	        session.clear();
     	    }
        	 if(count%100000 ==0 ){
        		 long timeTemp=System.currentTimeMillis();
        		 System.out.println((count / 100000)+":"+((timeTemp-timeNow)/1000));
        	 }
        	 
         }
         
         trans.commit();
         session.close();
         long timeNow2=System.currentTimeMillis();
         System.out.println("time past:"+(timeNow2-timeNow));
	}

}

 

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE hibernate-configuration PUBLIC "-//Hibernate/Hibernate Configuration DTD 3.0//EN"
                                         "http://hibernate.sourceforge.net/hibernate-configuration-3.0.dtd">
<hibernate-configuration>
 <session-factory>
  <property name="hibernate.connection.driver_class">com.mysql.jdbc.Driver</property>
  <property name="hibernate.connection.password">密码</property>
  <property name="hibernate.connection.url">jdbc:mysql://localhost:3306/test</property>
  <property name="hibernate.connection.username">用户名</property>
  <property name="hibernate.dialect">org.hibernate.dialect.MySQLDialect</property>
  <property name="hibernate.show_sql">false</property>
  <property name="hibernate.format_sql">true</property>
  <property name="hibernate.jdbc.batch_size">50</property> 
  <mapping class="org.cc.data.test.TestEntity"/>
 </session-factory>
</hibernate-configuration>

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics